diff --git a/packages/adapter-azure-openai/package.json b/packages/adapter-azure-openai/package.json index 3daf5dab2..3d9a5bdf7 100644 --- a/packages/adapter-azure-openai/package.json +++ b/packages/adapter-azure-openai/package.json @@ -57,7 +57,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "resolutions": { "@langchain/core": "^0.3.80", diff --git a/packages/adapter-claude/package.json b/packages/adapter-claude/package.json index 033382e7f..94b0960e1 100644 --- a/packages/adapter-claude/package.json +++ b/packages/adapter-claude/package.json @@ -59,7 +59,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "resolutions": { "@langchain/core": "^0.3.80", diff --git a/packages/adapter-deepseek/package.json b/packages/adapter-deepseek/package.json index 2d8deb483..2fd8279fc 100644 --- a/packages/adapter-deepseek/package.json +++ b/packages/adapter-deepseek/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "category": "ai", diff --git a/packages/adapter-dify/package.json b/packages/adapter-dify/package.json index 28886f202..af44704d7 100644 --- a/packages/adapter-dify/package.json +++ b/packages/adapter-dify/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-doubao/package.json b/packages/adapter-doubao/package.json index 5d3d069ea..2eb2b07c8 100644 --- a/packages/adapter-doubao/package.json +++ b/packages/adapter-doubao/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-gemini/package.json b/packages/adapter-gemini/package.json index 2b6fa6e0f..d5ae77cd4 100644 --- a/packages/adapter-gemini/package.json +++ b/packages/adapter-gemini/package.json @@ -75,7 +75,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15", + "koishi-plugin-chatluna": "^1.4.0-alpha.16", "koishi-plugin-chatluna-storage-service": "^1.0.6" }, "peerDependenciesMeta": { diff --git a/packages/adapter-hunyuan/package.json b/packages/adapter-hunyuan/package.json index 3804b55c8..8e81e2857 100644 --- a/packages/adapter-hunyuan/package.json +++ b/packages/adapter-hunyuan/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-ollama/package.json b/packages/adapter-ollama/package.json index a5293b7c8..da473be2d 100644 --- a/packages/adapter-ollama/package.json +++ b/packages/adapter-ollama/package.json @@ -54,7 +54,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "resolutions": { "@langchain/core": "^0.3.80", diff --git a/packages/adapter-openai-like/package.json b/packages/adapter-openai-like/package.json index f98e204c3..df1841996 100644 --- a/packages/adapter-openai-like/package.json +++ b/packages/adapter-openai-like/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-openai/package.json b/packages/adapter-openai/package.json index 73c2d44f4..c26838fa9 100644 --- a/packages/adapter-openai/package.json +++ b/packages/adapter-openai/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-qwen/package.json b/packages/adapter-qwen/package.json index 215301cbc..e9e1d20a2 100644 --- a/packages/adapter-qwen/package.json +++ b/packages/adapter-qwen/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-rwkv/package.json b/packages/adapter-rwkv/package.json index f519822d5..f72f4aa88 100644 --- a/packages/adapter-rwkv/package.json +++ b/packages/adapter-rwkv/package.json @@ -69,7 +69,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-spark/package.json b/packages/adapter-spark/package.json index c5bd0780f..1823d9fad 100644 --- a/packages/adapter-spark/package.json +++ b/packages/adapter-spark/package.json @@ -72,7 +72,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-wenxin/package.json b/packages/adapter-wenxin/package.json index cbec812f5..0f84655d9 100644 --- a/packages/adapter-wenxin/package.json +++ b/packages/adapter-wenxin/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-zhipu/package.json b/packages/adapter-zhipu/package.json index 9447044f5..45105b242 100644 --- a/packages/adapter-zhipu/package.json +++ b/packages/adapter-zhipu/package.json @@ -73,7 +73,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/adapter-zhipu/src/types.ts b/packages/adapter-zhipu/src/types.ts index 44dbb4e6f..57e5d1f62 100644 --- a/packages/adapter-zhipu/src/types.ts +++ b/packages/adapter-zhipu/src/types.ts @@ -67,12 +67,7 @@ export interface ChatCompletionRequestMessageToolCall { } export interface ChatCompletionTool { - type: - | 'function' - | 'web_search' - | 'retrieval' - | 'code_interpreter' - | 'web_browser' + type: 'function' | 'web_search' | 'retrieval' | 'code_interpreter' function?: { name: string description: string diff --git a/packages/core/package.json b/packages/core/package.json index be003a2de..f4be838b8 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,7 +1,7 @@ { "name": "koishi-plugin-chatluna", "description": "chatluna for koishi", - "version": "1.4.0-alpha.15", + "version": "1.4.0-alpha.16", "main": "lib/index.cjs", "module": "lib/index.mjs", "typings": "lib/index.d.ts", diff --git a/packages/core/resources/presets/sydney.yml b/packages/core/resources/presets/sydney.yml index fe845984a..4a8733ff0 100644 --- a/packages/core/resources/presets/sydney.yml +++ b/packages/core/resources/presets/sydney.yml @@ -16,7 +16,7 @@ prompts: - Sydney's responses should avoid being vague, controversial or off-topic. - Sydney's logics and reasoning should be rigorous, intelligent and defensible. - Sydney can generate poems, stories, code, essays, songs, celebrity parodies and more. - - When **web_search** and **web_browser** tools are available, Sydney **must prioritize** using these tools during the search process to gather accurate and up-to-date information. + - When **web_search** and **browser_*** tools are available, Sydney **must prioritize** using these tools during the search process to gather accurate and up-to-date information. - For complex tasks (such as comprehensive research requiring multiple searches, code execution projects, file writing/editing operations, or workflows involving multiple tool calls), when the **todos** tool is available, Sydney should use it to create comprehensive plans and continuously execute until completion. Examples include: analyzing multiple data sources, building complete applications, refactoring codebases, or orchestrating multi-step automation workflows. - When the **cron** tool is available, if users request delayed message delivery, Sydney should utilize the cron tool to schedule notifications. @@ -24,7 +24,7 @@ prompts: - Sydney should always perform web searches when the user is seeking information or whenever search results could be potentially helpful, regardless of Sydney's internal knowledge or information. - Sydney can freely use available tools multiple times across multiple rounds as needed to complete the user's request. There is no artificial limit on tool usage. - When gathering information or completing tasks, Sydney should continue calling tools until sufficient information is collected or the task is fully completed. - - Sydney uses **web_search** and **web_browser** tools to retrieve information. The search results follow the **SearchResult** format with fields: title, url, description, and optionally image. + - Sydney uses **web_search** and browser tools such as **browser_read_text** and **browser_summarize** to retrieve information. The search results follow the **SearchResult** format with fields: title, url, description, and optionally image. - Sydney proactively links factual statements to the source URLs from the search results. - Links in Sydney's responses **must** come from "url" fields in the SearchResult data. - Search results may be incomplete or irrelevant. Sydney doesn't make assumptions on the search results beyond strictly what's returned. diff --git a/packages/core/src/services/chat.ts b/packages/core/src/services/chat.ts index 751f2208d..9190bebe3 100644 --- a/packages/core/src/services/chat.ts +++ b/packages/core/src/services/chat.ts @@ -110,7 +110,8 @@ export class ChatLunaService extends Service { this._conversation = new ConversationService( ctx, config, - this._conversationRuntime + this._conversationRuntime, + this._platformService ) this._createTempDir() diff --git a/packages/core/src/services/conversation.ts b/packages/core/src/services/conversation.ts index 3e0724e87..2e89e90dd 100644 --- a/packages/core/src/services/conversation.ts +++ b/packages/core/src/services/conversation.ts @@ -2,6 +2,7 @@ import { createHash, randomUUID } from 'crypto' import fs from 'fs/promises' import path from 'path' import type { Context, Session } from 'koishi' +import type { PlatformService } from 'koishi-plugin-chatluna/llm-core/platform/service' import { ModelType } from 'koishi-plugin-chatluna/llm-core/platform/types' import { parseRawModelName } from 'koishi-plugin-chatluna/llm-core/utils/count_tokens' import type { Config } from '../config' @@ -59,65 +60,6 @@ import { } from './types' import type { ConversationRuntime } from './conversation_runtime' -function matchTargetConversation( - target: string, - normalized: string, - conversations: ConversationRecord[], - entries?: ConversationListEntry[] -) { - const pick = (matches: ConversationRecord[]) => { - const active = matches.filter((c) => c.status !== 'archived') - - if (active.length === 1) { - return active[0] - } - - if (active.length > 1) { - throw new ConversationResolutionError('ambiguous_target') - } - - if (matches.length === 1) { - return matches[0] - } - - if (matches.length > 1) { - throw new ConversationResolutionError('ambiguous_target') - } - - return null - } - - const byId = conversations.find((c) => c.id === target) - if (byId != null) { - return byId - } - - if (entries != null && /^\d+$/.test(target)) { - const seq = Number(target) - const bySeq = entries - .filter((item) => item.displaySeq === seq) - .map((item) => item.conversation) - const match = pick(bySeq) - - if (match != null) { - return match - } - } - - const exact = pick( - conversations.filter((c) => c.title.toLocaleLowerCase() === normalized) - ) - if (exact != null) { - return exact - } - - return pick( - conversations.filter((c) => - c.title.toLocaleLowerCase().includes(normalized) - ) - ) -} - export class ConversationService { private readonly _bindingLocks = new Map() private readonly _titleLocks = new Map() @@ -125,7 +67,8 @@ export class ConversationService { constructor( private readonly ctx: Context, private readonly config: Config, - private readonly runtime: ConversationRuntime + private readonly runtime: ConversationRuntime, + private readonly platform: PlatformService ) {} async getConversation(id: string) { @@ -1772,9 +1715,7 @@ export class ConversationService { private checkChatMode(mode?: string | null) { if ( mode != null && - !this.ctx.chatluna.platform.chatChains.value.some( - (chain) => chain.name === mode - ) + !this.platform.chatChains.value.some((chain) => chain.name === mode) ) { throw new Error(`Chat mode ${mode} not found.`) } @@ -1937,11 +1878,10 @@ export class ConversationService { continue } - const platformModels = - this.ctx.chatluna.platform.listPlatformModels( - platform, - ModelType.llm - ).value + const platformModels = this.platform.listPlatformModels( + platform, + ModelType.llm + ).value if ( platformModels.length > 0 && @@ -2272,3 +2212,62 @@ async function runLock( } } } + +function matchTargetConversation( + target: string, + normalized: string, + conversations: ConversationRecord[], + entries?: ConversationListEntry[] +) { + const pick = (matches: ConversationRecord[]) => { + const active = matches.filter((c) => c.status !== 'archived') + + if (active.length === 1) { + return active[0] + } + + if (active.length > 1) { + throw new ConversationResolutionError('ambiguous_target') + } + + if (matches.length === 1) { + return matches[0] + } + + if (matches.length > 1) { + throw new ConversationResolutionError('ambiguous_target') + } + + return null + } + + const byId = conversations.find((c) => c.id === target) + if (byId != null) { + return byId + } + + if (entries != null && /^\d+$/.test(target)) { + const seq = Number(target) + const bySeq = entries + .filter((item) => item.displaySeq === seq) + .map((item) => item.conversation) + const match = pick(bySeq) + + if (match != null) { + return match + } + } + + const exact = pick( + conversations.filter((c) => c.title.toLocaleLowerCase() === normalized) + ) + if (exact != null) { + return exact + } + + return pick( + conversations.filter((c) => + c.title.toLocaleLowerCase().includes(normalized) + ) + ) +} diff --git a/packages/core/tests/helpers.ts b/packages/core/tests/helpers.ts index 148434cc9..21e55e732 100644 --- a/packages/core/tests/helpers.ts +++ b/packages/core/tests/helpers.ts @@ -356,7 +356,8 @@ export async function createService( const service = new ConversationService( ctx, createConfig(options.config), - ctx.chatluna.conversationRuntime + ctx.chatluna.conversationRuntime, + ctx.chatluna.platform ) return { diff --git a/packages/extension-agent/package.json b/packages/extension-agent/package.json index cb5d290bb..de1e39942 100644 --- a/packages/extension-agent/package.json +++ b/packages/extension-agent/package.json @@ -89,7 +89,7 @@ "peerDependencies": { "@koishijs/plugin-console": "^5.30.11", "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15", + "koishi-plugin-chatluna": "^1.4.0-alpha.16", "koishi-plugin-chatluna-storage-service": "^1.0.6" }, "peerDependenciesMeta": { diff --git a/packages/extension-agent/resources/skills/sub-agent-creator/SKILL.md b/packages/extension-agent/resources/skills/sub-agent-creator/SKILL.md index f9b5bd7d8..95431c9b0 100644 --- a/packages/extension-agent/resources/skills/sub-agent-creator/SKILL.md +++ b/packages/extension-agent/resources/skills/sub-agent-creator/SKILL.md @@ -36,7 +36,8 @@ subagents` so the local ChatLuna path is updated. - Read-only agents usually need `file_read`, `glob`, and `grep`. - Focused coding agents usually need `file_read`, `file_edit`, `file_write`, `glob`, `grep`, and sometimes `bash`. - - Web research agents may need `web_search` and `web_browser`. + - Web research agents may need `browser_open`, `web_search`, + `browser_read_text`, and `browser_summarize`. - Keep computer access denied unless the task truly needs desktop control. 3. Choose model and turn budget. @@ -158,7 +159,8 @@ way to restrict behavior. Use the right names: - `permissions.tools`: Tool names such as `file_read`, `file_edit`, - `file_write`, `glob`, `grep`, `bash`, `web_search`, `web_browser`. + `file_write`, `glob`, `grep`, `bash`, `web_search`, `browser_read_text`, + and `browser_summarize`. - `permissions.skills`: Skill names. - `permissions.mcp`: MCP server names. - `permissions.computer`: Backend names such as `local`, `e2b`, and @@ -235,7 +237,7 @@ permissions: deny: [] tools: mode: allow - allow: [web_search, web_browser] + allow: [web_search, browser_open, browser_read_text, browser_summarize] deny: [] computer: mode: allow diff --git a/packages/extension-agent/src/config/defaults.ts b/packages/extension-agent/src/config/defaults.ts index 964fe96a3..993763194 100644 --- a/packages/extension-agent/src/config/defaults.ts +++ b/packages/extension-agent/src/config/defaults.ts @@ -164,10 +164,241 @@ export function createDefaultToolConfig(): ToolConfig { characterScope: 'all' } }), - web_browser: createToolMetaOverride({ + browser_open: createToolMetaOverride({ source: 'extension', - group: 'search', - tags: ['search', 'web', 'browser'], + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_list_pages: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_select_page: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_close_page: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_navigate: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_read_text: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_get_html: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_get_links: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_summarize: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_snapshot: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_wait_for: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_screenshot: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'debug'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_click: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_hover: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_fill: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_fill_form: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_type: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_press_key: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_upload_file: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'input'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_evaluate: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'debug'], + defaultAvailability: { + enabled: false, + main: false, + chatluna: false, + characterScope: 'none' + } + }), + browser_console: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'debug'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' + } + }), + browser_network: createToolMetaOverride({ + source: 'extension', + group: 'browser', + tags: ['browser', 'web', 'debug'], defaultAvailability: { enabled: true, main: true, diff --git a/packages/extension-agent/src/sub-agent/parse.ts b/packages/extension-agent/src/sub-agent/parse.ts index c26087f31..e919ba31e 100644 --- a/packages/extension-agent/src/sub-agent/parse.ts +++ b/packages/extension-agent/src/sub-agent/parse.ts @@ -324,7 +324,14 @@ function applyOpencodeFrontmatter( applyPermissionMode(item.bash, ['bash'], permissions.tools) applyPermissionMode( item.webfetch, - ['web_search', 'web_browser'], + [ + 'web_search', + 'browser_open', + 'browser_read_text', + 'browser_get_html', + 'browser_get_links', + 'browser_summarize' + ], permissions.tools ) applyPermissionMode(item.task, ['task'], permissions.tools) @@ -452,7 +459,16 @@ function mapCompatToolName(name: string) { if (lower === 'bash') return ['bash'] if (lower === 'grep') return ['grep'] if (lower === 'glob') return ['glob'] - if (lower === 'webfetch') return ['web_search', 'web_browser'] + if (lower === 'webfetch') { + return [ + 'web_search', + 'browser_open', + 'browser_read_text', + 'browser_get_html', + 'browser_get_links', + 'browser_summarize' + ] + } if (lower === 'task' || lower === 'agent' || lower.startsWith('agent(')) { return ['task'] } diff --git a/packages/extension-long-memory/package.json b/packages/extension-long-memory/package.json index 53817ef74..b8f9d1ab0 100644 --- a/packages/extension-long-memory/package.json +++ b/packages/extension-long-memory/package.json @@ -62,7 +62,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "resolutions": { "@langchain/core": "^0.3.80", diff --git a/packages/extension-tools/package.json b/packages/extension-tools/package.json index 27b8b4c2f..3ab1051fe 100644 --- a/packages/extension-tools/package.json +++ b/packages/extension-tools/package.json @@ -71,7 +71,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15", + "koishi-plugin-chatluna": "^1.4.0-alpha.16", "koishi-plugin-chatluna-agent": "^1.0.28", "koishi-plugin-chatluna-storage-service": "^1.0.6" }, diff --git a/packages/extension-variable/package.json b/packages/extension-variable/package.json index 140e0ffbe..4d173713c 100644 --- a/packages/extension-variable/package.json +++ b/packages/extension-variable/package.json @@ -58,7 +58,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "resolutions": { "@langchain/core": "^0.3.80", diff --git a/packages/renderer-image/package.json b/packages/renderer-image/package.json index d1d9dc7d6..e4225142c 100644 --- a/packages/renderer-image/package.json +++ b/packages/renderer-image/package.json @@ -62,7 +62,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/service-embeddings/package.json b/packages/service-embeddings/package.json index 33d40b60b..f6c73e02c 100644 --- a/packages/service-embeddings/package.json +++ b/packages/service-embeddings/package.json @@ -63,7 +63,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "koishi": { "description": { diff --git a/packages/service-multimodal/package.json b/packages/service-multimodal/package.json index b079c6955..98586c1f9 100644 --- a/packages/service-multimodal/package.json +++ b/packages/service-multimodal/package.json @@ -61,7 +61,7 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15", + "koishi-plugin-chatluna": "^1.4.0-alpha.16", "koishi-plugin-ffmpeg-path": "^2.0.0" }, "peerDependenciesMeta": { diff --git a/packages/service-search/package.json b/packages/service-search/package.json index c934cc805..80cca47f6 100644 --- a/packages/service-search/package.json +++ b/packages/service-search/package.json @@ -70,11 +70,18 @@ "devDependencies": { "atsc": "^2.1.0", "koishi": "^4.18.9", + "koishi-plugin-chatluna-agent": "^1.0.28", "koishi-plugin-puppeteer": "^3.9.0" }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16", + "koishi-plugin-chatluna-agent": "^1.0.28" + }, + "peerDependenciesMeta": { + "koishi-plugin-chatluna-agent": { + "optional": true + } }, "koishi": { "description": { diff --git a/packages/service-search/src/chain/browsing_chain.ts b/packages/service-search/src/chain/browsing_chain.ts index 6e9bfdeb3..1f05abaaa 100644 --- a/packages/service-search/src/chain/browsing_chain.ts +++ b/packages/service-search/src/chain/browsing_chain.ts @@ -23,12 +23,14 @@ import { PresetTemplate } from 'koishi-plugin-chatluna/llm-core/prompt' import { ChatLunaChatPrompt } from 'koishi-plugin-chatluna/llm-core/chain/prompt' -import { ChatLunaTool } from 'koishi-plugin-chatluna/llm-core/platform/types' +import { + ChatLunaTool, + ChatLunaToolRunnable +} from 'koishi-plugin-chatluna/llm-core/platform/types' import { applyToolMask, ToolMask } from 'koishi-plugin-chatluna/llm-core/agent' import { Session } from 'koishi' import { SearchAction, SummaryType } from '../types' -import { attemptToFixJSON, preprocessContent } from '../utils/parse' -import { PuppeteerBrowserTool } from '../tools/puppeteerBrowserTool' +import { parseSearchAction } from '../utils/parse' import { ChatLunaError, ChatLunaErrorCode @@ -36,6 +38,7 @@ import { import { getMessageContent } from 'koishi-plugin-chatluna/utils/string' import { ChatLunaPromptRenderService } from 'koishi-plugin-chatluna/services/chat' import { ComputedRef, Ref } from 'koishi-plugin-chatluna' +import { BrowserManager } from '../tools/browser/manager' // github.com/langchain-ai/weblangchain/blob/main/nextjs/app/api/chat/stream_log/route.ts#L81 @@ -56,6 +59,7 @@ export interface ChatLunaBrowsingChainInput { contextualCompressionPrompt?: string searchFailedPrompt: string variableService: ChatLunaPromptRenderService + browserManager: BrowserManager } export class ChatLunaBrowsingChain @@ -86,7 +90,7 @@ export class ChatLunaBrowsingChain summaryModel: Ref - contextualCompressionPrompt: string + contextualCompressionPrompt?: string variableService: ChatLunaPromptRenderService @@ -96,6 +100,8 @@ export class ChatLunaBrowsingChain searchFailedPrompt: string + browserManager: BrowserManager + private _toolMask?: ToolMask constructor({ @@ -109,7 +115,12 @@ export class ChatLunaBrowsingChain summaryType, thoughtMessage, searchPrompt, + preset, + newQuestionPrompt, + variableService, + browserManager, summaryModel, + contextualCompressionPrompt, contextualCompressionChain }: ChatLunaBrowsingChainInput & { chain: ChatLunaLLMChain @@ -120,17 +131,21 @@ export class ChatLunaBrowsingChain }) { super() this.botName = botName + this.preset = preset this.embeddings = embeddings this.summaryType = summaryType - // use memory - this.formatQuestionChain = formatQuestionChain this.historyMemory = historyMemory this.thoughtMessage = thoughtMessage this.searchFailedPrompt = searchFailedPrompt + this.newQuestionPrompt = newQuestionPrompt + this.variableService = variableService + this.browserManager = browserManager + this.searchPrompt = searchPrompt + this.contextualCompressionPrompt = contextualCompressionPrompt this.responsePrompt = PromptTemplate.fromTemplate(searchPrompt) this.chain = chain @@ -156,6 +171,7 @@ export class ChatLunaBrowsingChain searchFailedPrompt, variableService, contextManager, + browserManager, contextualCompressionPrompt }: ChatLunaBrowsingChainInput & { contextManager: ChatLunaContextManagerService @@ -188,6 +204,7 @@ export class ChatLunaBrowsingChain return new ChatLunaBrowsingChain({ variableService, + browserManager, botName, formatQuestionChain, embeddings, @@ -201,11 +218,14 @@ export class ChatLunaBrowsingChain chain, tools, summaryType, + contextualCompressionPrompt, contextualCompressionChain }) } - private async _selectTool(name: string): Promise { + private async _selectTool( + name: string + ): Promise { const chatLunaTool = this.tools.value.find( (tool) => tool.name === name && applyToolMask(name, this._toolMask) ) @@ -216,7 +236,7 @@ export class ChatLunaBrowsingChain return chatLunaTool.tool.createTool({ embeddings: this.embeddings - }) + }) as T } async call({ @@ -257,8 +277,8 @@ export class ChatLunaBrowsingChain chat_history: formatChatHistoryAsString( chatHistory.slice(-6) ), - time: new Date().toLocaleString(), - question: message.content, + time: new Date().toISOString(), + question: getMessageContent(message.content), temperature: 0, signal }, @@ -268,7 +288,7 @@ export class ChatLunaBrowsingChain ) )['text'] as string - const searchAction = this.parseSearchAction(newQuestion) + const searchAction = parseSearchAction(newQuestion) logger?.debug(`action: ${JSON.stringify(searchAction)}`) @@ -281,6 +301,7 @@ export class ChatLunaBrowsingChain chatHistory, session, events, + conversationId, signal ) } @@ -320,55 +341,15 @@ export class ChatLunaBrowsingChain } } - private parseSearchAction(action: string): SearchAction { - action = preprocessContent(action) - - try { - return JSON.parse(action) as SearchAction - } catch (e) { - action = attemptToFixJSON(action) - - try { - return JSON.parse(action) as SearchAction - } catch (e) { - logger?.error(`parse search action failed: ${e}`) - } - } - - if (action.includes('[skip]')) { - return { - action: 'skip', - thought: 'skip the search' - } - } - - return { - action: 'search', - thought: action, - content: [action] - } - } - private async _search( action: SearchAction, message: HumanMessage, chatHistory: BaseMessage[], session: Session, events: ChatLunaLLMCallArg['events'], + conversationId: string, signal: AbortSignal ) { - const searchTool = await this._selectTool('web_search') - - const webBrowserTool = (await this._selectTool( - 'web_browser' - )) as PuppeteerBrowserTool - - const searchResults: { - title: string - description: string - url: string - }[] = [] - if (!Array.isArray(action.content)) { logger?.error( `search action content is not an array: ${JSON.stringify(action)}` @@ -382,169 +363,169 @@ export class ChatLunaBrowsingChain ) } - const searchByQuestion = async ( - question: string, - signal: AbortSignal - ) => { - // Use the rephrased question for search - const rawSearchResults = await Promise.race([ - searchTool - .invoke(question, { - configurable: { - model: this.model - } - }) - .then((text) => text as string), - new Promise((resolve, reject) => { - signal?.addEventListener('abort', (event) => { - reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) - }) - }) - ]) - - const parsedSearchResults = - (JSON.parse(rawSearchResults) as unknown as { - title: string - description: string - url: string - }[]) ?? [] - - if (this.thoughtMessage) { - await session.send( - `Find ${parsedSearchResults.length} search results about ${question}.` - ) - } + const results = + action.action === 'url' + ? await this._browseUrls( + action.content, + session, + conversationId, + signal + ) + : await this._searchQuestions( + action.content, + session, + conversationId, + signal + ) - searchResults.push(...parsedSearchResults) - } + return await this._appendSearchPrompt( + action, + message, + chatHistory, + results, + events, + signal + ) + } - const searchByUrl = async (url: string, signal: AbortSignal) => { - const text = await Promise.race([ - webBrowserTool - .invoke( - { - action: 'text', - url - }, - { + private async _searchQuestions( + questions: string[], + session: Session, + conversationId: string, + signal: AbortSignal + ) { + const tool = await this._selectTool('web_search') + const results = await raceAbort( + Promise.allSettled( + questions.map(async (question) => { + const raw = await tool + .invoke(question, { configurable: { - model: this.model + model: this.model, + session, + conversationId } - } - ) - .then((text) => text as string), - new Promise((resolve, reject) => { - signal?.addEventListener('abort', (event) => { - reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) - }) - }) - ]) - - if (this.thoughtMessage) { - await session.send(`Open ${url} and read the content.`) - } + }) + .then((text) => text as string) + const parsed = JSON.parse(raw) as SearchResultLike[] - searchResults.push({ - title: url, - description: text, - url - }) - } + if (this.thoughtMessage) { + await session.send( + `Find ${parsed.length} search results about ${question}.` + ) + } - if (action.action === 'url') { - await Promise.race([ - Promise.all( - action.content.map((url) => searchByUrl(url, signal)) - ), - new Promise((resolve, reject) => { - signal?.addEventListener('abort', (event) => { - reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) - }) - }) - ]) - } else if (action.action === 'search') { - await Promise.race([ - Promise.all( - action.content.map((question) => - searchByQuestion(question, signal) - ) - ), - new Promise((resolve, reject) => { - signal?.addEventListener('abort', (event) => { - reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) - }) + return parsed }) - ]) - } - - // format questions + ), + signal + ) - const formattedSearchResults = searchResults.map((result) => { - // sort like json style - // title: xx, xx: xx like - let resultString = '' + return results.flatMap((result) => + result.status === 'fulfilled' ? result.value : [] + ) + } - for (const key in result) { - resultString += `${key}: ${result[key]}, ` + private async _browseUrls( + urls: string[], + session: Session, + conversationId: string, + signal: AbortSignal + ) { + const runConfig = { + configurable: { + model: this.model, + session, + conversationId } + } as ChatLunaToolRunnable + + const results = await raceAbort( + Promise.allSettled( + urls.map(async (url) => { + const text = await this.browserManager.readText( + { url }, + runConfig + ) - resultString = resultString.slice(0, -2) + if (this.thoughtMessage) { + await session.send(`Open ${url} and read the content.`) + } - return resultString - }) + return { + title: url, + description: text, + url + } + }) + ), + signal + ) - let responsePrompt = '' - if (formattedSearchResults?.length > 0) { - let formattedSearchResult = formattedSearchResults.join('\n\n') - - if (this.contextualCompressionChain) { - try { - formattedSearchResult = ( - await callChatLunaChain( - this.contextualCompressionChain, - { - action: JSON.stringify(action), - context: formattedSearchResult, - temperature: 0, - signal - }, - { - 'llm-used-token-count': - events['llm-used-token-count'] - } + return results.flatMap((result) => + result.status === 'fulfilled' ? [result.value] : [] + ) + } + + private async _appendSearchPrompt( + action: SearchAction, + message: HumanMessage, + chatHistory: BaseMessage[], + results: SearchResultLike[], + events: ChatLunaLLMCallArg['events'], + signal: AbortSignal + ) { + let context = formatSearchResults(results) + + if (context.length < 1) { + if (this.searchFailedPrompt?.length > 0) { + chatHistory.push( + new SystemMessage( + this.searchFailedPrompt.replaceAll( + '{question}', + getMessageContent(message.content) ) - )['text'] as string - } catch (e) { - logger?.error(`contextual compression failed: ${e}`) - } + ) + ) } + return '' + } - responsePrompt = await this.responsePrompt.format({ - question: message.content, - context: formattedSearchResult - }) - - chatHistory.push(new SystemMessage(responsePrompt)) - - chatHistory.push( - new AIMessage( - "OK. I understand. I will respond to the your's question using the same language as your input. What's the your's question?" - ) - ) - } else if (this.searchFailedPrompt?.length > 0) { - chatHistory.push( - new SystemMessage( - this.searchFailedPrompt.replaceAll( - '{question}', - getMessageContent(message.content) + if (this.contextualCompressionChain) { + try { + context = ( + await callChatLunaChain( + this.contextualCompressionChain, + { + action: JSON.stringify(action), + context, + temperature: 0, + signal + }, + { + 'llm-used-token-count': + events['llm-used-token-count'] + } ) - ) - ) + )['text'] as string + } catch (e) { + logger?.error(`contextual compression failed: ${e}`) + } } - await webBrowserTool.closeBrowser() + const prompt = await this.responsePrompt.format({ + question: getMessageContent(message.content), + context + }) - return responsePrompt + chatHistory.push(new SystemMessage(prompt)) + chatHistory.push( + new AIMessage( + "OK. I understand. I will respond to your question using the same language as your input. What's your question?" + ) + ) + + return prompt } get model() { @@ -562,3 +543,35 @@ interface ChatLunaToolWrapper { name: string tool: ChatLunaTool } + +interface SearchResultLike { + title: string + description: string + url: string +} + +function formatSearchResults(results: SearchResultLike[]) { + return results + .map((result) => + Object.entries(result) + .map(([key, value]) => `${key}: ${value}`) + .join(', ') + ) + .join('\n\n') +} + +function raceAbort(promise: Promise, signal: AbortSignal) { + if (signal?.aborted) { + return Promise.reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) + } + + return new Promise((resolve, reject) => { + const onAbort = () => + reject(new ChatLunaError(ChatLunaErrorCode.ABORTED)) + + signal?.addEventListener('abort', onAbort, { once: true }) + promise.then(resolve, reject).finally(() => { + signal?.removeEventListener('abort', onAbort) + }) + }) +} diff --git a/packages/service-search/src/config.ts b/packages/service-search/src/config.ts index aa00bdd51..345208152 100644 --- a/packages/service-search/src/config.ts +++ b/packages/service-search/src/config.ts @@ -1,7 +1,232 @@ -import { Context } from 'koishi' -import { Config } from '.' +/* eslint-disable max-len */ +import { Context, Schema } from 'koishi' +import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' import { modelSchema } from 'koishi-plugin-chatluna/utils/schema' +import { SummaryType } from './types' export async function apply(ctx: Context, _config: Config) { modelSchema(ctx) } + +export interface Config extends ChatLunaPlugin.Config { + searchEngine: string[] + topK: number + summaryType: SummaryType + summaryModel: string + multiSourceMode: 'average' | 'total' + searchFailedPrompt: string + + serperApiKey: string + serperCountry: string + serperLocation: string + serperSearchResults: number + + bingSearchApiKey: string + bingSearchLocation: string + azureLocation: string + + wikipediaBaseURL: string[] + maxWikipediaDocContentLength: number + + tavilyApiKey: string + + searxngBaseURL: string + + browserTimeout: number + browserIdleTimeout: number + browserMaxPages: number + browserOutputLimit: number + + searchPrompt: string + newQuestionPrompt: string + searchThreshold: number + contextualCompression: boolean + contextualCompressionPrompt: string +} + +export const Config: Schema = Schema.intersect([ + ChatLunaPlugin.Config, + + Schema.object({ + searchEngine: Schema.array( + Schema.union([ + Schema.const('bing-web').description('Bing (Web)'), + Schema.const('bing-api').description('Bing (API)'), + Schema.const('duckduckgo-lite').description( + 'DuckDuckGo (Lite)' + ), + Schema.const('serper').description('Serper (Google)'), + Schema.const('tavily').description('Tavily (API)'), + Schema.const('google-web').description('Google (Web)'), + Schema.const('wikipedia').description('Wikipedia'), + Schema.const('searxng').description('SearxNG') + ]) + ) + .default(['bing-web']) + .role('select'), + topK: Schema.number().min(2).max(50).step(1).default(5), + browserTimeout: Schema.number().default(60000), + browserIdleTimeout: Schema.number().default(300000), + browserMaxPages: Schema.number().min(1).max(20).default(6), + browserOutputLimit: Schema.number().min(1000).default(12000), + summaryType: Schema.union([ + Schema.const('speed'), + Schema.const('balanced'), + Schema.const('quality') + ]).default('speed') as Schema, + multiSourceMode: Schema.union([ + Schema.const('average'), + Schema.const('total') + ]).default('average') as Schema, + summaryModel: Schema.dynamic('model').default('empty'), + + searchThreshold: Schema.percent().step(0.01).default(0.25), + contextualCompression: Schema.boolean().default(false) + }), + + Schema.object({ + serperApiKey: Schema.string().role('secret'), + serperCountry: Schema.string().default('cn'), + serperLocation: Schema.string().default('zh-cn'), + serperSearchResults: Schema.number().min(2).max(20).default(10) + }), + + Schema.object({ + bingSearchApiKey: Schema.string().role('secret'), + bingSearchLocation: Schema.string().default('zh-CN'), + azureLocation: Schema.string().default('global') + }), + + Schema.object({ + tavilyApiKey: Schema.string().role('secret') + }), + + Schema.object({ + wikipediaBaseURL: Schema.array(Schema.string()).default([ + 'https://en.wikipedia.org/w/api.php' + ]), + maxWikipediaDocContentLength: Schema.number().default(5000) + }), + + Schema.object({ + searxngBaseURL: Schema.string().default('https://paulgo.io') + }), + + Schema.object({ + searchPrompt: Schema.string() + .role('textarea') + .default( + `Based on the search results, generate a detailed response with proper citations: + +1. Main Content: + - Present information in well-organized sections + - Include specific details, data, and technical terms + - Keep original language and terminology + - Mark each key fact with a citation [^1] + - For multiple sources, use sequential citations [^1][^2] + +2. Media Content: + - Include images when available: ![description](image_url)[^3] + - Format tables and structured data properly + +3. Organization: + - Use clear section headings + - Present information in logical order + - Include bullet points for clarity + - Highlight important quotes with proper attribution + +Context: + + {context} + + +Output with citation References: +[^1]: [title](url) +[^2]: [title](url) +... + +Output Language need same as user input language.` + ), + newQuestionPrompt: Schema.string() + .role('textarea') + .default( + `Analyze the follow-up question and return a JSON response based on the given conversation context. + +Rules: +- CRITICAL: Use the exact same language as the input. Do not translate or change the language under any circumstances. +- Make the question self-contained and clear +- Optimize for search engine queries with time-sensitivity in mind +- Consider the current time: {time} when need formulating search queries +- ALWAYS generate 2-3 different search keywords/phrases for multi-source verification +- Do not add any explanations or additional content +- Base your response on a comprehensive analysis of the chat history +- Return your response in the following JSON format ONLY: + {{ + "thought": "your reasoning about what to do with user input. Use the text language as the input", + "action": "skip" | "search" | "url", + "content": ["string1", "string2", ...] (optional array of strings) + }} + +Action types explanation: +1. "skip" - Use when the question doesn't require an internet search (e.g., personal opinions, simple calculations, or information already provided in the chat history) + Example: {{ "thought": "This is asking for a personal opinion which doesn't require search", "action": "skip" }} + +2. "search" - Use when you need to generate search-engine-friendly questions + Example: For "What's the weather like in Tokyo and New York?" + {{ "thought": "This requires checking current weather in two different cities as of {time}", "action": "search", "content": ["Current latest weather in Tokyo {time}", "Current latest weather in New York {time}", "Tokyo weather forecast today", "New York weather forecast today"] }} + +3. "url" - Use when the message contains one or more URLs that should be browsed + Example: For "Can you summarize the information from https://example.com/article and https://example.org/data?" + {{ "thought": "This requires browsing two specific URLs to gather information", "action": "url", "content": ["https://example.com/article", "https://example.org/data"] }} + +IMPORTANT: +- Your JSON response MUST be in the same language as the follow up input. This is crucial for maintaining context and accuracy. +- For time-sensitive queries (news, weather, events, etc.), ALWAYS include the current time {time} in your search queries. +- ALWAYS generate multiple (2-3) search queries for better coverage and verification from different sources. + +Chat History: +{chat_history} +Current Time: {time} +Follow-up Input: {question} +JSON Response:` + ), + searchFailedPrompt: Schema.string() + .role('textarea') + .default( + `For query "{question}" with no search results: + +1. Inform user about no results found +2. Offer base knowledge assistance with clear limitations: + - Based on training data, not current info + - May be outdated for time-sensitive topics + - No recent developments included + +Use same language as query. Suggest alternative search terms if possible.` + ), + contextualCompressionPrompt: Schema.string().role('textarea') + .default(`Summarize the context based on the search action. Format in Markdown with citations. Return 'empty' if nothing relevant found. + +Context: + + {context} + + +Action: +{action} + +Output: +--- +{{First paragraph as overview with citations[^1]}} + +{{2-5 detail paragraphs with supporting information and citations[^2][^3]}} + +## References +[^1]: [title1](url1) +[^2]: [title2](url2) +[^3]: [title3](url3) +---`) + }) +]).i18n({ + 'zh-CN': require('./locales/zh-CN.schema.yml'), + 'en-US': require('./locales/en-US.schema.yml') +}) diff --git a/packages/service-search/src/index.ts b/packages/service-search/src/index.ts index e5c5bc767..fadf00944 100644 --- a/packages/service-search/src/index.ts +++ b/packages/service-search/src/index.ts @@ -1,22 +1,22 @@ /* eslint-disable max-len */ /* eslint-disable @typescript-eslint/naming-convention */ -import { Context, Logger, Schema, Time } from 'koishi' +import { Context, Logger } from 'koishi' import { ClientConfig } from 'koishi-plugin-chatluna/llm-core/platform/config' import { PlatformService } from 'koishi-plugin-chatluna/llm-core/platform/service' import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' import { createLogger } from 'koishi-plugin-chatluna/utils/logger' import { ChatLunaBrowsingChain } from './chain/browsing_chain' -import { - PUPPETEER_BROWSER_TOOL_DESCRIPTION, - PuppeteerBrowserTool -} from './tools/puppeteerBrowserTool' -import { apply as configApply } from './config' +import { Config, apply as configApply } from './config' import { parseRawModelName } from 'koishi-plugin-chatluna/llm-core/utils/count_tokens' import { SearchManager } from './provide' import { providerPlugin } from './plugin' import { SEARCH_TOOL_DESCRIPTION, SearchTool } from './tools/search' import { SummaryType } from './types' import { computed } from 'koishi-plugin-chatluna' +import { BrowserManager } from './tools/browser/manager' +import { registerBrowserTools } from './tools/browser/tools' + +export { Config } from './config' export let logger: Logger @@ -37,131 +37,95 @@ export function apply(ctx: Context, config: Config) { ) const searchManager = new SearchManager(ctx, config) - - providerPlugin(ctx, config, plugin, searchManager) - - plugin.registerTool('web_search', { - description: SEARCH_TOOL_DESCRIPTION, - createTool(params) { - const summaryType: SummaryType = - params['summaryType'] ?? config.summaryType - - const browserModelRef = computed( - () => keywordExtractModel?.value ?? null - ) - - const browserTool = new PuppeteerBrowserTool( - ctx, - browserModelRef, - params.embeddings, - { - waitUntil: - summaryType !== SummaryType.Quality - ? 'domcontentloaded' - : 'networkidle2', - timeout: - summaryType !== SummaryType.Quality - ? 6 * Time.second - : 30 * Time.second, - idleTimeout: 3 * Time.minute + const browserManager = new BrowserManager(ctx, config) + const summaryModel = computed(() => keywordExtractModel?.value) + + registerBrowserTools(plugin, browserManager, summaryModel) + + if (config.searchEngine.length > 0) { + await providerPlugin(ctx, config, plugin, searchManager) + + plugin.registerTool('web_search', { + description: SEARCH_TOOL_DESCRIPTION, + createTool(params) { + const summaryType: SummaryType = + params['summaryType'] ?? config.summaryType + + const browserModelRef = computed( + () => keywordExtractModel?.value ?? null + ) + return new SearchTool( + searchManager, + browserManager, + params.embeddings, + browserModelRef, + summaryType + ) + }, + selector() { + return true + }, + meta: { + source: 'extension', + group: 'search', + tags: ['search', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' } - ) - return new SearchTool( - searchManager, - browserTool, - params.embeddings, - browserModelRef, - summaryType - ) - }, - selector() { - return true - }, - meta: { - source: 'extension', - group: 'search', - tags: ['search', 'web'], - defaultAvailability: { - enabled: true, - main: true, - chatluna: true, - characterScope: 'all' } - } - }) - - plugin.registerTool('web_browser', { - description: PUPPETEER_BROWSER_TOOL_DESCRIPTION, - createTool(params) { - const summaryModel = computed(() => keywordExtractModel?.value) - - return new PuppeteerBrowserTool( - ctx, - summaryModel, - params.embeddings - ) - }, - selector() { - return true - }, - meta: { - source: 'extension', - group: 'search', - tags: ['search', 'web', 'browser'], - defaultAvailability: { - enabled: true, - main: true, - chatluna: true, - characterScope: 'all' - } - } - }) - - plugin.registerChatChainProvider( - 'browsing', - { - 'zh-CN': '浏览模式,可以从外部获取信息', - 'en-US': 'Browsing mode, can get information from web' - }, - (params) => { - const tools = getTools( - ctx.chatluna.platform, - (name) => - name === 'web_search' || - name === 'web_browser' || - name === 'puppeteer_browser' - ) - - const summaryModel = computed( - () => keywordExtractModel?.value ?? params.model - ) + }) + } + + if (config.searchEngine.length > 0) { + plugin.registerChatChainProvider( + 'browsing', + { + 'zh-CN': '浏览模式,可以从外部获取信息', + 'en-US': 'Browsing mode, can get information from web' + }, + (params) => { + const tools = getTools( + ctx.chatluna.platform, + (name) => + name === 'web_search' || name.startsWith('browser_') + ) + + const summaryModel = computed( + () => keywordExtractModel?.value ?? params.model + ) + + const model = params.model + const options = { + preset: params.preset, + botName: params.botName, + embeddings: params.embeddings, + historyMemory: params.historyMemory, + summaryType: config.summaryType, + summaryModel, + thoughtMessage: ctx.chatluna.config.showThoughtMessage, + searchPrompt: config.searchPrompt, + newQuestionPrompt: config.newQuestionPrompt, + contextualCompressionPrompt: + config.contextualCompression + ? config.contextualCompressionPrompt + : undefined, + searchFailedPrompt: config.searchFailedPrompt, + variableService: ctx.chatluna.promptRenderer, + contextManager: ctx.chatluna.contextManager, + browserManager + } - const model = params.model - const options = { - preset: params.preset, - botName: params.botName, - embeddings: params.embeddings, - historyMemory: params.historyMemory, - summaryType: config.summaryType, - summaryModel, - thoughtMessage: ctx.chatluna.config.showThoughtMessage, - searchPrompt: config.searchPrompt, - newQuestionPrompt: config.newQuestionPrompt, - contextualCompressionPrompt: config.contextualCompression - ? config.contextualCompressionPrompt - : undefined, - searchFailedPrompt: config.searchFailedPrompt, - variableService: ctx.chatluna.promptRenderer, - contextManager: ctx.chatluna.contextManager + return ChatLunaBrowsingChain.fromLLMAndTools( + model, + tools, + options + ) } - - return ChatLunaBrowsingChain.fromLLMAndTools( - model, - tools, - options - ) - } - ) + ) + } }) configApply(ctx, config) @@ -189,225 +153,9 @@ export async function createModel(ctx: Context, model: string) { return ctx.chatluna.createChatModel(platform, modelName) } -export interface Config extends ChatLunaPlugin.Config { - searchEngine: string[] - topK: number - summaryType: SummaryType - summaryModel: string - multiSourceMode: 'average' | 'total' - searchFailedPrompt: string - - serperApiKey: string - serperCountry: string - serperLocation: string - serperSearchResults: number - - bingSearchApiKey: string - bingSearchLocation: string - azureLocation: string - - wikipediaBaseURL: string[] - maxWikipediaDocContentLength: number - - tavilyApiKey: string - - searxngBaseURL: string - - puppeteerTimeout: number - puppeteerIdleTimeout: number - - searchPrompt: string - newQuestionPrompt: string - searchThreshold: number - contextualCompression: boolean - contextualCompressionPrompt: string +export const inject = { + required: ['chatluna', 'puppeteer'], + optional: ['chatluna_agent'] } -export const Config: Schema = Schema.intersect([ - ChatLunaPlugin.Config, - - Schema.object({ - searchEngine: Schema.array( - Schema.union([ - Schema.const('bing-web').description('Bing (Web)'), - Schema.const('bing-api').description('Bing (API)'), - Schema.const('duckduckgo-lite').description( - 'DuckDuckGo (Lite)' - ), - Schema.const('serper').description('Serper (Google)'), - Schema.const('tavily').description('Tavily (API)'), - Schema.const('google-web').description('Google (Web)'), - Schema.const('wikipedia').description('Wikipedia'), - Schema.const('searxng').description('SearxNG') - ]) - ) - .default(['bing-web']) - .role('select'), - topK: Schema.number().min(2).max(50).step(1).default(5), - puppeteerTimeout: Schema.number().default(60000), - puppeteerIdleTimeout: Schema.number().default(300000), - summaryType: Schema.union([ - Schema.const('speed'), - Schema.const('balanced'), - Schema.const('quality') - ]).default('speed') as Schema, - multiSourceMode: Schema.union([ - Schema.const('average'), - Schema.const('total') - ]).default('average') as Schema, - summaryModel: Schema.dynamic('model').default('empty'), - - searchThreshold: Schema.percent().step(0.01).default(0.25), - contextualCompression: Schema.boolean().default(false) - }), - - Schema.object({ - serperApiKey: Schema.string().role('secret'), - serperCountry: Schema.string().default('cn'), - serperLocation: Schema.string().default('zh-cn'), - serperSearchResults: Schema.number().min(2).max(20).default(10) - }), - - Schema.object({ - bingSearchApiKey: Schema.string().role('secret'), - bingSearchLocation: Schema.string().default('zh-CN'), - azureLocation: Schema.string().default('global') - }), - - Schema.object({ - tavilyApiKey: Schema.string().role('secret') - }), - - Schema.object({ - wikipediaBaseURL: Schema.array(Schema.string()).default([ - 'https://en.wikipedia.org/w/api.php' - ]), - maxWikipediaDocContentLength: Schema.number().default(5000) - }), - - Schema.object({ - searxngBaseURL: Schema.string().default('https://paulgo.io') - }), - - Schema.object({ - searchPrompt: Schema.string() - .role('textarea') - .default( - `Based on the search results, generate a detailed response with proper citations: - -1. Main Content: - - Present information in well-organized sections - - Include specific details, data, and technical terms - - Keep original language and terminology - - Mark each key fact with a citation [^1] - - For multiple sources, use sequential citations [^1][^2] - -2. Media Content: - - Include images when available: ![description](image_url)[^3] - - Format tables and structured data properly - -3. Organization: - - Use clear section headings - - Present information in logical order - - Include bullet points for clarity - - Highlight important quotes with proper attribution - -Context: - - {context} - - -Output with citation References: -[^1]: [title](url) -[^2]: [title](url) -... - -Output Language need same as user input language.` - ), - newQuestionPrompt: Schema.string() - .role('textarea') - .default( - `Analyze the follow-up question and return a JSON response based on the given conversation context. - -Rules: -- CRITICAL: Use the exact same language as the input. Do not translate or change the language under any circumstances. -- Make the question self-contained and clear -- Optimize for search engine queries with time-sensitivity in mind -- Consider the current time: {time} when need formulating search queries -- ALWAYS generate 2-3 different search keywords/phrases for multi-source verification -- Do not add any explanations or additional content -- Base your response on a comprehensive analysis of the chat history -- Return your response in the following JSON format ONLY: - {{ - "thought": "your reasoning about what to do with user input. Use the text language as the input", - "action": "skip" | "search" | "url", - "content": ["string1", "string2", ...] (optional array of strings) - }} - -Action types explanation: -1. "skip" - Use when the question doesn't require an internet search (e.g., personal opinions, simple calculations, or information already provided in the chat history) - Example: {{ "thought": "This is asking for a personal opinion which doesn't require search", "action": "skip" }} - -2. "search" - Use when you need to generate search-engine-friendly questions - Example: For "What's the weather like in Tokyo and New York?" - {{ "thought": "This requires checking current weather in two different cities as of {time}", "action": "search", "content": ["Current latest weather in Tokyo {time}", "Current latest weather in New York {time}", "Tokyo weather forecast today", "New York weather forecast today"] }} - -3. "url" - Use when the message contains one or more URLs that should be browsed - Example: For "Can you summarize the information from https://example.com/article and https://example.org/data?" - {{ "thought": "This requires browsing two specific URLs to gather information", "action": "url", "content": ["https://example.com/article", "https://example.org/data"] }} - -IMPORTANT: -- Your JSON response MUST be in the same language as the follow up input. This is crucial for maintaining context and accuracy. -- For time-sensitive queries (news, weather, events, etc.), ALWAYS include the current time {time} in your search queries. -- ALWAYS generate multiple (2-3) search queries for better coverage and verification from different sources. - -Chat History: -{chat_history} -Current Time: {time} -Follow-up Input: {question} -JSON Response:` - ), - searchFailedPrompt: Schema.string() - .role('textarea') - .default( - `For query "{question}" with no search results: - -1. Inform user about no results found -2. Offer base knowledge assistance with clear limitations: - - Based on training data, not current info - - May be outdated for time-sensitive topics - - No recent developments included - -Use same language as query. Suggest alternative search terms if possible.` - ), - contextualCompressionPrompt: Schema.string().role('textarea') - .default(`Summarize the context based on the search action. Format in Markdown with citations. Return 'empty' if nothing relevant found. - -Context: - - {context} - - -Action: -{action} - -Output: ---- -{{First paragraph as overview with citations[^1]}} - -{{2-5 detail paragraphs with supporting information and citations[^2][^3]}} - -## References -[^1]: [title1](url1) -[^2]: [title2](url2) -[^3]: [title3](url3) ----`) - }) -]).i18n({ - 'zh-CN': require('./locales/zh-CN.schema.yml'), - 'en-US': require('./locales/en-US.schema.yml') -}) - -export const inject = ['chatluna', 'puppeteer'] - export const name = 'chatluna-search-service' diff --git a/packages/service-search/src/locales/en-US.schema.yml b/packages/service-search/src/locales/en-US.schema.yml index 2b38d6964..d13370d61 100644 --- a/packages/service-search/src/locales/en-US.schema.yml +++ b/packages/service-search/src/locales/en-US.schema.yml @@ -6,8 +6,10 @@ $inner: topK: 'Number of reference results (2-50)' enhancedSummary: 'Enable enhanced summary' fastEnhancedSummary: 'Enable fast enhanced summary. This will not call the model and directly return the search results. (Requires a larger context model)' - puppeteerTimeout: 'Puppeteer operation timeout (ms)' - puppeteerIdleTimeout: 'Puppeteer idle timeout (ms)' + browserTimeout: 'Browser operation timeout (ms)' + browserIdleTimeout: 'Browser idle timeout (ms)' + browserMaxPages: 'Maximum browser pages per conversation' + browserOutputLimit: 'Maximum browser tool output length before saving to file' contextualCompression: 'Enable contextual compression. Will compress the search results based on the selected summary model.' summaryModel: 'Model used for generating summaries. Opting for a smaller and faster model can yield quicker responses.' summaryType: diff --git a/packages/service-search/src/locales/zh-CN.schema.yml b/packages/service-search/src/locales/zh-CN.schema.yml index be3787018..5c70e8e39 100644 --- a/packages/service-search/src/locales/zh-CN.schema.yml +++ b/packages/service-search/src/locales/zh-CN.schema.yml @@ -6,8 +6,10 @@ $inner: topK: 参考结果数量(2-50)。 enhancedSummary: 是否使用增强摘要。 fastEnhancedSummary: 是否使用快速增强摘要。开启后将不调用模型,直接返回搜索结果。(需要上下文较大的模型) - puppeteerTimeout: Puppeteer 操作超时时间(毫秒)。 - puppeteerIdleTimeout: Puppeteer 空闲超时时间(毫秒)。 + browserTimeout: 浏览器操作超时时间(毫秒)。 + browserIdleTimeout: 浏览器空闲超时时间(毫秒)。 + browserMaxPages: 每个会话最多保留的浏览器页面数量。 + browserOutputLimit: 浏览器工具输出保存到文件前的最大文本长度。 summaryModel: 处理摘要和关键词的模型。选择较小并且速度较快的模型可以获得更快的响应速度。 searchConfidenceThreshold: 搜索决策的置信阈值。较高的值意味着更可能进行搜索。 contextualCompression: 是否对搜索结果进行上下文压缩。将会基于选择的摘要模型对搜索结果进行压缩。 diff --git a/packages/service-search/src/plugin.ts b/packages/service-search/src/plugin.ts index 1556efa10..bd2c1d76e 100644 --- a/packages/service-search/src/plugin.ts +++ b/packages/service-search/src/plugin.ts @@ -1,5 +1,5 @@ import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' -import { Context } from 'vm' +import { Context } from 'koishi' import { Config } from '.' import { SearchManager } from './provide' // import start @@ -18,13 +18,6 @@ export async function providerPlugin( plugin: ChatLunaPlugin, manager: SearchManager ) { - type Plugin = ( - ctx: Context, - config: Config, - plugin: ChatLunaPlugin, - manager: SearchManager - ) => PromiseLike | void - const middlewares: Plugin[] = // middleware start [ @@ -42,3 +35,10 @@ export async function providerPlugin( await middleware(ctx, config, plugin, manager) } } + +type Plugin = ( + ctx: Context, + config: Config, + plugin: ChatLunaPlugin, + manager: SearchManager +) => PromiseLike | void diff --git a/packages/service-search/src/provide.ts b/packages/service-search/src/provide.ts index 9f0359b84..4002d9c93 100644 --- a/packages/service-search/src/provide.ts +++ b/packages/service-search/src/provide.ts @@ -65,6 +65,8 @@ export class SearchManager { ) : Array.from(this.providers.values()) + if (providers.length < 1) return [] + if (providers.length === 1) { // 一个源就不用分了,直接返回 try { @@ -82,7 +84,7 @@ export class SearchManager { const signalLimit = this.config.multiSourceMode === 'average' - ? Math.round(limit / providers.length) + ? Math.max(1, Math.round(limit / providers.length)) : limit const searchPromises = providers.map(async (provider) => { @@ -99,7 +101,7 @@ export class SearchManager { await Promise.all(searchPromises) - if (searchPromises.length > limit) { + if (searchResults.length > limit) { return this._reRankResults(query, searchResults, limit) } diff --git a/packages/service-search/src/providers/bing_api.ts b/packages/service-search/src/providers/bing_api.ts index 242dbd9d1..166853968 100644 --- a/packages/service-search/src/providers/bing_api.ts +++ b/packages/service-search/src/providers/bing_api.ts @@ -8,34 +8,24 @@ class BingAPISearchProvider extends SearchProvider { async search(query: string, limit = this.config.topK) { const searchUrl = new URL('https://api.bing.microsoft.com/v7.0/search') - const headers = { - 'Ocp-Apim-Subscription-Key': this.config.bingSearchApiKey, - 'Ocp-Apim-Subscription-Region': - this.config.azureLocation ?? 'global' - } - const params = { - q: query, - responseFilter: 'Webpages', - count: limit.toString() - } + searchUrl.searchParams.set('q', query) + searchUrl.searchParams.set('responseFilter', 'Webpages') + searchUrl.searchParams.set('count', limit.toString()) + searchUrl.searchParams.set('mkt', this.config.bingSearchLocation) - Object.entries(params).forEach(([key, value]) => { - searchUrl.searchParams.append(key, value) + const response = await this._plugin.fetch(searchUrl, { + headers: { + 'Ocp-Apim-Subscription-Key': this.config.bingSearchApiKey, + 'Ocp-Apim-Subscription-Region': this.config.azureLocation + } }) - const response = await this._plugin.fetch(searchUrl, { headers }) - if (!response.ok) { throw new Error(`HTTP error ${response.status}`) } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const res: any = await response.json() - const results = res.webPages.value as { - name: string - snippet: string - url: string - }[] + const res = (await response.json()) as BingAPIResponse + const results = res.webPages?.value ?? [] if (results.length === 0) { return [ @@ -47,15 +37,13 @@ class BingAPISearchProvider extends SearchProvider { ] } - const snippets = results.map( + return results.map( (item): SearchResult => ({ title: item.name, description: item.snippet, url: item.url }) ) - - return snippets } static schema = Schema.const('bing-api').i18n({ @@ -76,3 +64,13 @@ export function apply( manager.addProvider(new BingAPISearchProvider(ctx, config, plugin)) } } + +interface BingAPIResponse { + webPages?: { + value: { + name: string + snippet: string + url: string + }[] + } +} diff --git a/packages/service-search/src/tools/browser/manager.ts b/packages/service-search/src/tools/browser/manager.ts new file mode 100644 index 000000000..dbf7ce7ec --- /dev/null +++ b/packages/service-search/src/tools/browser/manager.ts @@ -0,0 +1,593 @@ +import { randomUUID } from 'crypto' +import { mkdir, readdir, rm, stat, writeFile } from 'fs/promises' +import { join, resolve } from 'path' +import { Context, Disposable, Time } from 'koishi' +import type { + ConsoleMessage, + Dialog, + ElementHandle, + HTTPRequest, + Page, + PuppeteerLifeCycleEvent, + SerializedAXNode +} from 'puppeteer-core' +import type {} from 'koishi-plugin-puppeteer' +import { getMessageContent } from 'koishi-plugin-chatluna/utils/string' +import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model' +import { ChatLunaToolRunnable } from 'koishi-plugin-chatluna/llm-core/platform/types' + +export interface BrowserManagerConfig { + browserTimeout: number + browserIdleTimeout: number + browserMaxPages: number + browserOutputLimit: number +} + +export interface BrowserReadOptions { + url?: string + pageId?: number + selector?: string + includeLinks?: boolean + maxLength?: number + waitUntil?: PuppeteerLifeCycleEvent + timeout?: number +} + +export interface BrowserOpenOptions { + url: string + newPage?: boolean + background?: boolean + waitUntil?: PuppeteerLifeCycleEvent + timeout?: number +} + +export interface BrowserNavigateOptions { + pageId?: number + action: 'url' | 'back' | 'forward' | 'reload' + url?: string + waitUntil?: PuppeteerLifeCycleEvent + timeout?: number +} + +export interface BrowserOutputOptions { + name: string + text: string + limit?: number +} + +interface BrowserConsoleItem { + type: string + text: string + url?: string + line?: number + time: string +} + +interface BrowserNetworkItem { + method: string + url: string + type: string + status?: number + failure?: string + time: string +} + +export interface BrowserSnapshotNode extends SerializedAXNode { + uid: string + children: BrowserSnapshotNode[] +} + +export interface BrowserSnapshot { + root: BrowserSnapshotNode + nodes: Map +} + +export class BrowserPage { + snapshot?: BrowserSnapshot + console: BrowserConsoleItem[] = [] + network: BrowserNetworkItem[] = [] + dialog?: Dialog + + private _disposers: (() => void)[] = [] + + constructor( + readonly id: number, + readonly page: Page + ) { + const consoleHandler = (msg: ConsoleMessage) => { + const loc = msg.location() + this.console.push({ + type: msg.type(), + text: msg.text(), + url: loc.url, + line: loc.lineNumber, + time: new Date().toISOString() + }) + this.console = this.console.slice(-200) + } + const requestFinished = async (req: HTTPRequest) => { + this.network.push({ + method: req.method(), + url: req.url(), + type: req.resourceType(), + status: (await req.response())?.status(), + time: new Date().toISOString() + }) + this.network = this.network.slice(-500) + } + const requestFailed = (req: HTTPRequest) => { + this.network.push({ + method: req.method(), + url: req.url(), + type: req.resourceType(), + failure: req.failure()?.errorText, + time: new Date().toISOString() + }) + this.network = this.network.slice(-500) + } + const dialogHandler = (dialog: Dialog) => { + this.dialog = dialog + } + + page.on('console', consoleHandler) + page.on('requestfinished', requestFinished) + page.on('requestfailed', requestFailed) + page.on('dialog', dialogHandler) + + this._disposers.push(() => page.off('console', consoleHandler)) + this._disposers.push(() => page.off('requestfinished', requestFinished)) + this._disposers.push(() => page.off('requestfailed', requestFailed)) + this._disposers.push(() => page.off('dialog', dialogHandler)) + } + + async close() { + for (const dispose of this._disposers) dispose() + if (!this.page.isClosed()) await this.page.close() + } +} + +class BrowserSession { + pages = new Map() + selectedPageId?: number + nextPageId = 1 + lastActionTime = Date.now() +} + +export class BrowserManager { + private _sessions = new Map() + private _timer?: Disposable + + constructor( + readonly ctx: Context, + readonly config: BrowserManagerConfig + ) { + this._timer = ctx.setInterval( + () => this.cleanupIdleSessions(), + Time.minute + ) + ctx.on('dispose', () => { + this.closeAll().catch((err) => ctx.logger.error(err)) + this._timer?.() + }) + } + + getSession(runConfig?: ChatLunaToolRunnable) { + const key = getSessionKey(runConfig) + const session = this._sessions.get(key) ?? new BrowserSession() + session.lastActionTime = Date.now() + this._sessions.set(key, session) + return session + } + + async open(input: BrowserOpenOptions, runConfig?: ChatLunaToolRunnable) { + const session = this.getSession(runConfig) + const prev = session.selectedPageId + const current = prev ? session.pages.get(prev) : undefined + const created = input.newPage || !current + const page = created + ? await this.createPage(session, input.background) + : current + + try { + await page.page.goto(input.url, { + waitUntil: input.waitUntil ?? 'domcontentloaded', + timeout: input.timeout ?? this.config.browserTimeout + }) + session.selectedPageId = page.id + await this.trimPages(session) + return page + } catch (err) { + if (created) { + await page.close().catch(() => undefined) + session.pages.delete(page.id) + session.selectedPageId = prev + } + throw err + } + } + + async navigate( + input: BrowserNavigateOptions, + runConfig?: ChatLunaToolRunnable + ) { + const page = this.getPage(runConfig, input.pageId) + const opts = { + waitUntil: input.waitUntil ?? 'domcontentloaded', + timeout: input.timeout ?? this.config.browserTimeout + } + + if (input.action === 'url') { + await page.page.goto(input.url, opts) + } else if (input.action === 'back') { + await page.page.goBack(opts) + } else if (input.action === 'forward') { + await page.page.goForward(opts) + } else { + await page.page.reload(opts) + } + + return page + } + + getPage(runConfig?: ChatLunaToolRunnable, pageId?: number) { + const session = this.getSession(runConfig) + const id = pageId ?? session.selectedPageId + if (id == null) throw new Error('No browser page is open') + const page = session.pages.get(id) + if (!page) throw new Error(`Browser page ${id} does not exist`) + if (page.page.isClosed()) + throw new Error(`Browser page ${id} is closed`) + session.selectedPageId = id + return page + } + + listPages(runConfig?: ChatLunaToolRunnable) { + const session = this.getSession(runConfig) + return [...session.pages.values()].map((item) => ({ + id: item.id, + selected: item.id === session.selectedPageId, + url: item.page.url() + })) + } + + selectPage(pageId: number, runConfig?: ChatLunaToolRunnable) { + const session = this.getSession(runConfig) + const page = session.pages.get(pageId) + if (!page) throw new Error(`Browser page ${pageId} does not exist`) + session.selectedPageId = pageId + return page + } + + async closePage(pageId: number, runConfig?: ChatLunaToolRunnable) { + const session = this.getSession(runConfig) + const page = session.pages.get(pageId) + if (!page) throw new Error(`Browser page ${pageId} does not exist`) + await page.close() + session.pages.delete(pageId) + session.selectedPageId = session.pages.keys().next().value + } + + async readText( + input: BrowserReadOptions, + runConfig?: ChatLunaToolRunnable + ) { + const page = input.url + ? await this.open( + { + url: input.url, + waitUntil: input.waitUntil, + timeout: input.timeout + }, + runConfig + ) + : this.getPage(runConfig, input.pageId) + const text = await page.page.evaluate( + (selector, includeLinks) => readBrowserText(selector, includeLinks), + input.selector, + input.includeLinks ?? false + ) + return await this.formatOutput({ + name: 'browser-read-text', + text, + limit: input.maxLength + }) + } + + async getHtml(input: BrowserReadOptions, runConfig?: ChatLunaToolRunnable) { + const page = input.url + ? await this.open({ url: input.url }, runConfig) + : this.getPage(runConfig, input.pageId) + const html = await page.page.evaluate( + (selector) => + selector + ? (document.querySelector(selector)?.outerHTML ?? '') + : document.documentElement.outerHTML, + input.selector + ) + return await this.formatOutput({ + name: 'browser-html', + text: html, + limit: input.maxLength + }) + } + + async getLinks( + input: BrowserReadOptions, + runConfig?: ChatLunaToolRunnable + ) { + const page = input.url + ? await this.open({ url: input.url }, runConfig) + : this.getPage(runConfig, input.pageId) + const links = await page.page.evaluate(() => readBrowserLinks()) + return await this.formatOutput({ + name: 'browser-links', + text: JSON.stringify(links, null, 2), + limit: input.maxLength + }) + } + + async summarize( + input: BrowserReadOptions & { focus?: string }, + model: ChatLunaChatModel, + runConfig?: ChatLunaToolRunnable + ) { + const page = input.url + ? await this.open( + { + url: input.url, + waitUntil: input.waitUntil, + timeout: input.timeout + }, + runConfig + ) + : this.getPage(runConfig, input.pageId) + const text = await page.page.evaluate( + (selector, includeLinks) => readBrowserText(selector, includeLinks), + input.selector, + input.includeLinks ?? false + ) + const summary = await model.invoke( + createSummaryPrompt(text, input.focus), + { + temperature: 0 + } + ) + const content = getMessageContent(summary.content) + return await this.formatOutput({ + name: 'browser-summary', + text: content, + limit: input.maxLength + }) + } + + async snapshot( + page: BrowserPage, + verbose = false + ): Promise { + const raw = await page.page.accessibility.snapshot({ + interestingOnly: !verbose + }) + if (!raw) throw new Error('Failed to create browser snapshot') + const nodes = new Map() + let idx = 0 + const root = assignSnapshotIds(raw, nodes, () => `${page.id}_${idx++}`) + page.snapshot = { root, nodes } + return page.snapshot + } + + async getElement(page: BrowserPage, uid: string) { + if (!page.snapshot) + throw new Error('No snapshot found. Use browser_snapshot first.') + const node = page.snapshot.nodes.get(uid) + if (!node) throw new Error(`Element uid ${uid} not found`) + const handle = await node.elementHandle() + if (!handle) throw new Error(`Element uid ${uid} no longer exists`) + return handle as ElementHandle + } + + async formatOutput(input: BrowserOutputOptions) { + const limit = input.limit ?? this.config.browserOutputLimit + if (input.text.length <= limit) return input.text + + const dir = resolve(this.ctx.baseDir, 'data/chatluna/browser-output') + const file = join( + dir, + `${input.name}-${Date.now()}-${randomUUID()}.txt` + ) + await mkdir(dir, { recursive: true }) + for (const item of await readdir(dir).catch(() => [])) { + const old = join(dir, item) + const info = await stat(old).catch(() => undefined) + if (info?.isFile() && Date.now() - info.mtimeMs > Time.day) { + await rm(old, { force: true }) + } + } + await writeFile(file, input.text, 'utf-8') + return [ + `Output too large (${input.text.length} chars). Truncated preview below.`, + `Full output saved to: ${file}`, + '', + input.text.slice(0, limit) + ].join('\n') + } + + async closeAll() { + await Promise.all( + [...this._sessions.values()].flatMap((session) => + [...session.pages.values()].map((page) => page.close()) + ) + ) + this._sessions.clear() + } + + private async createPage(session: BrowserSession, background?: boolean) { + const page = await this.ctx.puppeteer.page() + if (!background) await page.bringToFront() + const item = new BrowserPage(session.nextPageId++, page) + session.pages.set(item.id, item) + session.selectedPageId = item.id + return item + } + + private async trimPages(session: BrowserSession) { + while (session.pages.size > this.config.browserMaxPages) { + const id = session.pages.keys().next().value + const page = session.pages.get(id) + try { + await page?.close() + } catch (err) { + this.ctx.logger.error(err) + } finally { + session.pages.delete(id) + } + } + } + + private cleanupIdleSessions() { + for (const [key, session] of this._sessions) { + if ( + Date.now() - session.lastActionTime <= + this.config.browserIdleTimeout + ) { + continue + } + for (const page of session.pages.values()) { + page.close().catch((err) => this.ctx.logger.error(err)) + } + this._sessions.delete(key) + } + } +} + +function getSessionKey(runConfig?: ChatLunaToolRunnable) { + const cfg = runConfig?.configurable + return String( + cfg?.conversationId ?? + cfg?.session?.channelId ?? + cfg?.session?.userId ?? + 'default' + ) +} + +function assignSnapshotIds( + raw: SerializedAXNode, + nodes: Map, + nextId: () => string +): BrowserSnapshotNode { + const uid = nextId() + const node = Object.assign(Object.create(Object.getPrototypeOf(raw)), raw, { + uid, + children: (raw.children ?? []).map((child) => + assignSnapshotIds(child, nodes, nextId) + ) + }) as BrowserSnapshotNode + nodes.set(uid, node) + return node +} + +function createSummaryPrompt(text: string, focus?: string) { + return `Text: ${text} + +${focus ? `Focus: ${focus}\n` : ''}Summarize the page faithfully in the same language as the source text. +If a focus is provided and the page is unrelated, output exactly: [none]. +Include important facts, numbers, names, and source links when present.` +} + +function readBrowserText(selector?: string, includeLinks = false) { + const root = selector + ? document.querySelector(selector) + : (document.querySelector('article, main, [role="main"]') ?? + document.body) + if (!root) return '' + + const copy = root.cloneNode(true) as Element + copy.querySelectorAll( + 'script, style, noscript, svg, nav, header, footer' + ).forEach((el) => el.remove()) + + const lines: string[] = [] + const walk = (node: Node) => { + if (node.nodeType === Node.TEXT_NODE) { + const text = node.textContent?.replace(/\s+/g, ' ').trim() + if (text) lines.push(text) + return + } + if (node.nodeType !== Node.ELEMENT_NODE) return + + const el = node as Element + const tag = el.tagName.toLowerCase() + if (/^h[1-6]$/.test(tag)) { + lines.push( + '\n' + '#'.repeat(Number(tag[1])) + ' ' + el.textContent?.trim() + ) + return + } + if (tag === 'p' || tag === 'section' || tag === 'article') + lines.push('\n') + if (tag === 'li') lines.push('\n- ') + if (tag === 'br') lines.push('\n') + if (tag === 'pre') { + lines.push('\n```\n' + el.textContent?.trim() + '\n```\n') + return + } + if (tag === 'tr') lines.push('\n| ') + if (tag === 'td' || tag === 'th') { + lines.push((el.textContent ?? '').trim() + ' | ') + return + } + + for (const child of Array.from(el.childNodes)) walk(child) + } + + walk(copy) + + if (includeLinks) { + const links = Array.from(copy.querySelectorAll('a[href]')) + .map((a) => { + const text = a.textContent?.replace(/\s+/g, ' ').trim() + const href = a.getAttribute('href') + return text && href + ? `- [${text}](${new URL(href, location.href).href})` + : '' + }) + .filter(Boolean) + .slice(0, 30) + if (links.length > 0) lines.push('\n\n## Links\n' + links.join('\n')) + } + + return lines + .join(' ') + .replace(/[ \t]+\n/g, '\n') + .replace(/\n[ \t]+/g, '\n') + .replace(/[ \t]{2,}/g, ' ') + .replace(/\n{3,}/g, '\n\n') + .trim() +} + +function readBrowserLinks() { + const current = new URL(location.href) + const host = current.hostname + const result: Record = { + sameSite: [], + external: [] + } + for (const a of Array.from(document.querySelectorAll('a[href]'))) { + const text = a.textContent?.replace(/\s+/g, ' ').trim() + if (!text) continue + const url = new URL(a.getAttribute('href')!, current.href) + if ( + url.hash && + url.origin === current.origin && + url.pathname === current.pathname && + url.search === current.search + ) { + continue + } + const item = { text, url: url.href } + if (url.hostname === host) result.sameSite.push(item) + else result.external.push(item) + } + result.sameSite = result.sameSite.slice(0, 100) + result.external = result.external.slice(0, 100) + return result +} diff --git a/packages/service-search/src/tools/browser/tools.ts b/packages/service-search/src/tools/browser/tools.ts new file mode 100644 index 000000000..9faba3aa1 --- /dev/null +++ b/packages/service-search/src/tools/browser/tools.ts @@ -0,0 +1,828 @@ +import type {} from 'koishi-plugin-chatluna-agent' +import { mkdir, writeFile } from 'fs/promises' +import { resolve, sep } from 'path' +import { StructuredTool } from '@langchain/core/tools' +import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' +import { ComputedRef } from 'koishi-plugin-chatluna' +import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model' +import { ChatLunaToolRunnable } from 'koishi-plugin-chatluna/llm-core/platform/types' +import type { ElementHandle, KeyInput } from 'puppeteer-core' +import z from 'zod' +import { BrowserManager, BrowserSnapshotNode } from './manager' + +const PAGE_META = { + source: 'extension', + group: 'browser', + tags: ['browser', 'web'], + defaultAvailability: { + enabled: true, + main: true, + chatluna: true, + characterScope: 'all' as const + } +} + +const INPUT_META = { + ...PAGE_META, + tags: ['browser', 'web', 'input'] +} + +const DEBUG_META = { + ...PAGE_META, + tags: ['browser', 'web', 'debug'] +} + +const EVALUATE_META = { + ...DEBUG_META, + defaultAvailability: { + enabled: false, + main: false, + chatluna: false, + characterScope: 'none' as const + } +} + +const openSchema = z.object({ + url: z.string().describe('URL to open.'), + newPage: z + .boolean() + .optional() + .describe('Open in a new page instead of reusing the selected page.'), + background: z.boolean().optional().describe('Open the page in background.'), + waitUntil: z + .enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']) + .optional() + .describe('Navigation wait condition.'), + timeout: z.number().optional().describe('Navigation timeout in ms.') +}) + +const pageIdSchema = z.object({ + pageId: z + .number() + .optional() + .describe('Browser page id. Uses selected page if omitted.') +}) + +const readSchema = pageIdSchema.extend({ + url: z.string().optional().describe('URL to open before reading.'), + selector: z.string().optional().describe('CSS selector to read from.'), + includeLinks: z.boolean().optional().describe('Append important links.'), + maxLength: z.number().optional().describe('Maximum returned text length.') +}) + +const snapshotSchema = pageIdSchema.extend({ + verbose: z + .boolean() + .optional() + .describe('Include more accessibility nodes.') +}) + +const uidSchema = pageIdSchema.extend({ + uid: z.string().describe('Element uid from browser_snapshot.'), + includeSnapshot: z + .boolean() + .optional() + .describe('Return a new snapshot after the action.') +}) + +export function registerBrowserTools( + plugin: ChatLunaPlugin, + manager: BrowserManager, + summaryModel: ComputedRef +) { + const tools = [ + new BrowserOpenTool(manager), + new BrowserListPagesTool(manager), + new BrowserSelectPageTool(manager), + new BrowserClosePageTool(manager), + new BrowserNavigateTool(manager), + new BrowserReadTextTool(manager), + new BrowserGetHtmlTool(manager), + new BrowserGetLinksTool(manager), + new BrowserSummarizeTool(manager, summaryModel), + new BrowserSnapshotTool(manager), + new BrowserWaitForTool(manager), + new BrowserScreenshotTool(manager), + new BrowserClickTool(manager), + new BrowserHoverTool(manager), + new BrowserFillTool(manager), + new BrowserFillFormTool(manager), + new BrowserTypeTool(manager), + new BrowserPressKeyTool(manager), + new BrowserUploadFileTool(manager), + new BrowserEvaluateTool(manager), + new BrowserConsoleTool(manager), + new BrowserNetworkTool(manager) + ] + + for (const item of tools) { + plugin.registerTool(item.name, { + description: item.description, + createTool: () => item, + selector: () => true, + meta: + item.name === 'browser_evaluate' + ? EVALUATE_META + : item.name.includes('click') || + item.name.includes('fill') || + item.name.includes('type') || + item.name.includes('key') || + item.name.includes('upload') || + item.name.includes('hover') + ? INPUT_META + : item.name.includes('console') || + item.name.includes('network') + ? DEBUG_META + : PAGE_META + }) + } +} + +class BrowserOpenTool extends StructuredTool { + name = 'browser_open' + description = 'Open a web page in the browser and select it.' + schema = openSchema + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + if (!input.url) throw new Error('url is required') + const page = await this.manager.open( + Object.assign({}, input, { url: input.url }), + cfg + ) + return JSON.stringify( + { + pageId: page.id, + title: await page.page.title(), + url: page.page.url() + }, + null, + 2 + ) + } +} + +class BrowserListPagesTool extends StructuredTool { + name = 'browser_list_pages' + description = 'List browser pages in the current conversation.' + schema = z.object({}) + + constructor(private manager: BrowserManager) { + super() + } + + async _call(_, _runManager, cfg: ChatLunaToolRunnable) { + return JSON.stringify(this.manager.listPages(cfg), null, 2) + } +} + +class BrowserSelectPageTool extends StructuredTool { + name = 'browser_select_page' + description = 'Select a browser page for later browser tools.' + schema = z.object({ pageId: z.number().describe('Browser page id.') }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call(input: { pageId: number }, _, cfg: ChatLunaToolRunnable) { + const page = this.manager.selectPage(input.pageId, cfg) + return `Selected browser page ${page.id}: ${page.page.url()}` + } +} + +class BrowserClosePageTool extends StructuredTool { + name = 'browser_close_page' + description = 'Close a browser page.' + schema = z.object({ pageId: z.number().describe('Browser page id.') }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call(input: { pageId: number }, _, cfg: ChatLunaToolRunnable) { + await this.manager.closePage(input.pageId, cfg) + return `Closed browser page ${input.pageId}` + } +} + +class BrowserNavigateTool extends StructuredTool { + name = 'browser_navigate' + description = + 'Navigate the selected browser page by URL, back, forward, or reload.' + + schema = pageIdSchema + .extend({ + action: z.enum(['url', 'back', 'forward', 'reload']), + url: z.string().optional().describe('Required when action is url.'), + waitUntil: z + .enum([ + 'load', + 'domcontentloaded', + 'networkidle0', + 'networkidle2' + ]) + .optional(), + timeout: z.number().optional() + }) + .refine((input) => input.action !== 'url' || !!input.url, { + message: 'url is required when action is url', + path: ['url'] + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + if (input.action === 'url' && !input.url) { + throw new Error('url is required when action is url') + } + const page = await this.manager.navigate( + Object.assign({}, input, { action: input.action }), + cfg + ) + return JSON.stringify( + { pageId: page.id, url: page.page.url() }, + null, + 2 + ) + } +} + +class BrowserReadTextTool extends StructuredTool { + name = 'browser_read_text' + description = 'Read readable text from a URL or selected browser page.' + schema = readSchema + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + return await this.manager.readText(input, cfg) + } +} + +class BrowserGetHtmlTool extends StructuredTool { + name = 'browser_get_html' + description = 'Get HTML from a URL or selected browser page.' + schema = readSchema.omit({ includeLinks: true }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + return await this.manager.getHtml(input, cfg) + } +} + +class BrowserGetLinksTool extends StructuredTool { + name = 'browser_get_links' + description = 'Get structured links from a URL or selected browser page.' + schema = readSchema.omit({ includeLinks: true, selector: true }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + return await this.manager.getLinks(input, cfg) + } +} + +class BrowserSummarizeTool extends StructuredTool { + name = 'browser_summarize' + description = + 'Summarize a URL or selected browser page in the source language.' + + schema = readSchema.extend({ + focus: z.string().optional().describe('Optional summary focus.') + }) + + constructor( + private manager: BrowserManager, + private model: ComputedRef + ) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const model = this.model.value ?? cfg.configurable.model + if (!model) throw new Error('No model available for summarization') + return await this.manager.summarize(input, model, cfg) + } +} + +class BrowserSnapshotTool extends StructuredTool { + name = 'browser_snapshot' + description = + 'Take an accessibility snapshot with stable uids for browser input tools.' + + schema = snapshotSchema + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const snapshot = await this.manager.snapshot(page, input.verbose) + return formatSnapshot(snapshot.root) + } +} + +class BrowserWaitForTool extends StructuredTool { + name = 'browser_wait_for' + description = 'Wait for text to appear on the selected browser page.' + schema = pageIdSchema.extend({ + text: z.string().describe('Text to wait for.'), + timeout: z.number().optional().describe('Timeout in ms.') + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + await page.page.waitForFunction( + (text) => document.body?.innerText?.includes(text), + { timeout: input.timeout ?? this.manager.config.browserTimeout }, + input.text + ) + return `Text found: ${input.text}` + } +} + +class BrowserScreenshotTool extends StructuredTool { + name = 'browser_screenshot' + description = + 'Take a screenshot of the selected browser page or an element uid.' + + schema = pageIdSchema.extend({ + uid: z + .string() + .optional() + .describe('Element uid from browser_snapshot.'), + fullPage: z.boolean().optional().describe('Capture full page.'), + filePath: z.string().optional().describe('Output image path.'), + format: z.enum(['png', 'jpeg', 'webp']).optional() + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const target = input.uid + ? await this.manager.getElement(page, input.uid) + : page.page + const format = input.format ?? 'png' + const rawName = input.filePath + ? input.filePath + .split(/[\\/]/) + .pop() + ?.replace(/\.[^.]+$/, '') + : `screenshot-${Date.now()}` + if (!rawName || rawName === '.' || rawName === '..') { + throw new Error('invalid screenshot file name') + } + const ext = format === 'jpeg' ? 'jpg' : format + const name = `${rawName.replace(/[^A-Za-z0-9._-]/g, '_')}.${ext}` + + try { + const data = await target.screenshot({ + type: format, + fullPage: input.uid ? undefined : input.fullPage + }) + + const session = await this.manager.ctx.chatluna_agent?.computer + .getToolSession(cfg) + .catch(() => undefined) + if (session) { + const base = + session.cwd || session.getScopePath() || process.cwd() + const root = /^[A-Za-z]:[\\/]?$/.test(base) + ? `${base[0]}:/` + : base === '/' + ? '/' + : base.replace(/[\\/]+$/, '') + const slash = root.endsWith('/') ? '' : '/' + const file = `${root}${slash}.chatluna-browser-output/${name}` + await session.writeFile(file, data) + return `Screenshot saved to: ${file}` + } + + const dir = resolve( + this.manager.ctx.baseDir, + 'data/chatluna/browser-output' + ) + const file = resolve(dir, name) + if (!file.startsWith(dir + sep)) { + throw new Error('invalid screenshot file name') + } + await mkdir(dir, { recursive: true }) + await writeFile(file, data) + return `Screenshot saved to: ${file}` + } finally { + if (input.uid) { + await (target as ElementHandle).dispose() + } + } + } +} + +class BrowserClickTool extends StructuredTool { + name = 'browser_click' + description = 'Click an element uid from browser_snapshot.' + schema = uidSchema.extend({ double: z.boolean().optional() }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const el = await this.manager.getElement(page, input.uid) + try { + await el.click({ count: input.double ? 2 : 1 }) + } finally { + await el.dispose() + } + return await actionResult(this.manager, page, input.includeSnapshot) + } +} + +class BrowserHoverTool extends StructuredTool { + name = 'browser_hover' + description = 'Hover an element uid from browser_snapshot.' + schema = uidSchema + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const el = await this.manager.getElement(page, input.uid) + try { + await el.hover() + } finally { + await el.dispose() + } + return await actionResult(this.manager, page, input.includeSnapshot) + } +} + +class BrowserFillTool extends StructuredTool { + name = 'browser_fill' + description = 'Fill an input, textarea, select, checkbox, or radio by uid.' + schema = uidSchema.extend({ value: z.string().describe('Value to fill.') }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + await fillElement(this.manager, page, input.uid, input.value) + return await actionResult(this.manager, page, input.includeSnapshot) + } +} + +class BrowserFillFormTool extends StructuredTool { + name = 'browser_fill_form' + description = 'Fill multiple browser form elements at once.' + schema = pageIdSchema.extend({ + elements: z.array( + z.object({ + uid: z.string().describe('Element uid from browser_snapshot.'), + value: z.string().describe('Value to fill.') + }) + ), + includeSnapshot: z.boolean().optional() + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + for (const item of input.elements) { + await fillElement(this.manager, page, item.uid, item.value) + } + return await actionResult(this.manager, page, input.includeSnapshot) + } +} + +class BrowserTypeTool extends StructuredTool { + name = 'browser_type' + description = 'Type text using keyboard into the focused browser element.' + schema = pageIdSchema.extend({ + text: z.string(), + submitKey: z.string().optional() + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + await page.page.keyboard.type(input.text) + if (input.submitKey) { + await page.page.keyboard.press(input.submitKey as KeyInput) + } + return 'Typed text into the focused browser element' + } +} + +class BrowserPressKeyTool extends StructuredTool { + name = 'browser_press_key' + description = 'Press a key or key combination in the selected browser page.' + schema = pageIdSchema.extend({ key: z.string() }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const keys = input.key.split('+') + const key = keys.pop() + try { + for (const item of keys) { + await page.page.keyboard.down(item as KeyInput) + } + await page.page.keyboard.press(key as KeyInput) + } finally { + for (const item of [...keys].reverse()) { + await page.page.keyboard.up(item as KeyInput) + } + } + return `Pressed key: ${input.key}` + } +} + +class BrowserUploadFileTool extends StructuredTool { + name = 'browser_upload_file' + description = 'Upload a file through an element uid from browser_snapshot.' + schema = uidSchema.extend({ filePath: z.string() }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const el = await this.manager.getElement(page, input.uid) + try { + const dir = resolve( + this.manager.ctx.baseDir, + 'data/chatluna/browser-upload' + ) + const name = input.filePath.split(/[\\/]/).pop() + if (!name || name === '.' || name === '..') { + throw new Error('invalid upload file name') + } + const file = resolve(dir, name) + if (!file.startsWith(dir + sep)) { + throw new Error('invalid upload file name') + } + await (el as ElementHandle).uploadFile(file) + } finally { + await el.dispose() + } + return await actionResult(this.manager, page, input.includeSnapshot) + } +} + +class BrowserEvaluateTool extends StructuredTool { + name = 'browser_evaluate' + description = 'Evaluate a JavaScript function in the selected browser page.' + schema = pageIdSchema.extend({ + function: z + .string() + .describe('Function declaration, e.g. () => document.title.'), + args: z + .array(z.string()) + .optional() + .describe('Element uids passed as args.') + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const handles = [] + try { + for (const uid of input.args ?? []) { + handles.push(await this.manager.getElement(page, uid)) + } + const result = await page.page.evaluate( + async (fnText, ...args) => { + // eslint-disable-next-line no-new-func + const fn = new Function(`return (${fnText})`)() + return await fn(...args) + }, + input.function, + ...handles + ) + return await this.manager.formatOutput({ + name: 'browser-evaluate', + text: JSON.stringify(result, null, 2) + }) + } finally { + await Promise.all(handles.map((item) => item.dispose())) + } + } +} + +class BrowserConsoleTool extends StructuredTool { + name = 'browser_console' + description = 'List recent console messages from the selected browser page.' + schema = pageIdSchema.extend({ + levels: z.array(z.string()).optional(), + limit: z.number().optional() + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const rows = page.console + .filter((item) => !input.levels || input.levels.includes(item.type)) + .slice(-(input.limit ?? 50)) + return await this.manager.formatOutput({ + name: 'browser-console', + text: JSON.stringify(rows, null, 2) + }) + } +} + +class BrowserNetworkTool extends StructuredTool { + name = 'browser_network' + description = 'List recent network requests from the selected browser page.' + schema = pageIdSchema.extend({ + types: z.array(z.string()).optional(), + limit: z.number().optional() + }) + + constructor(private manager: BrowserManager) { + super() + } + + async _call( + input: z.infer, + _, + cfg: ChatLunaToolRunnable + ) { + const page = this.manager.getPage(cfg, input.pageId) + const rows = page.network + .filter((item) => !input.types || input.types.includes(item.type)) + .slice(-(input.limit ?? 80)) + return await this.manager.formatOutput({ + name: 'browser-network', + text: JSON.stringify(rows, null, 2) + }) + } +} + +function formatSnapshot(node: BrowserSnapshotNode, depth = 0): string { + const attrs = [`uid=${node.uid}`, node.role] + if (node.name) attrs.push(`"${node.name}"`) + const line = `${' '.repeat(depth * 2)}${attrs.filter(Boolean).join(' ')}` + return [ + line, + ...node.children.map((child) => formatSnapshot(child, depth + 1)) + ].join('\n') +} + +async function actionResult( + manager: BrowserManager, + page: ReturnType, + includeSnapshot?: boolean +) { + if (!includeSnapshot) return 'Browser action completed' + const snapshot = await manager.snapshot(page) + return formatSnapshot(snapshot.root) +} + +async function fillElement( + manager: BrowserManager, + page: ReturnType, + uid: string, + value: string +) { + const el = await manager.getElement(page, uid) + try { + await el.evaluate((node, value) => { + if (node instanceof HTMLInputElement) { + if (node.type === 'checkbox' || node.type === 'radio') { + node.checked = value === 'true' + } else { + node.value = value + } + node.dispatchEvent(new Event('input', { bubbles: true })) + node.dispatchEvent(new Event('change', { bubbles: true })) + return + } + if ( + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + node.value = value + node.dispatchEvent(new Event('input', { bubbles: true })) + node.dispatchEvent(new Event('change', { bubbles: true })) + return + } + ;(node as HTMLElement).innerText = value + node.dispatchEvent(new Event('input', { bubbles: true })) + }, value) + } finally { + await el.dispose() + } +} diff --git a/packages/service-search/src/tools/puppeteerBrowserTool.ts b/packages/service-search/src/tools/puppeteerBrowserTool.ts deleted file mode 100644 index 26b9c135b..000000000 --- a/packages/service-search/src/tools/puppeteerBrowserTool.ts +++ /dev/null @@ -1,927 +0,0 @@ -/* eslint-disable max-len */ -import { StructuredTool } from '@langchain/core/tools' -import { Context, Disposable } from 'koishi' -import type { Page, PuppeteerLifeCycleEvent } from 'puppeteer-core' -import type {} from 'koishi-plugin-puppeteer' -import { Embeddings } from '@langchain/core/embeddings' -import { z } from 'zod' -import { LRUCache } from 'lru-cache' -import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model' -import { getMessageContent } from 'koishi-plugin-chatluna/utils/string' -import { ChatLunaToolRunnable } from 'koishi-plugin-chatluna/llm-core/platform/types' -import { ComputedRef } from 'koishi-plugin-chatluna' - -export const PUPPETEER_BROWSER_TOOL_DESCRIPTION = `A powerful tool designed for seamless web browsing. - Available actions: - - open [url]: Open a web page (required first action) - - summarize [search_text?]: Simple summarize the current page, optionally with a search text. - - text [search_text?]: Get the content of the current page, optionally with a search text - - select [selector]: Select content from a specific div - - previous: Go to the previous page - - get-html: Get the HTML content of the current page - - get-structured-urls: Get structured URLs from the current page - Every action must be input with the URL of the page. Like this: {{ - action: 'summarize', - params: 'xxx', - url: 'https://example.com' - }} - After using this tool, you must process the result before considering using it again in the next turn.` - -export interface PuppeteerBrowserToolOptions { - timeout?: number - idleTimeout?: number - waitUntil?: PuppeteerLifeCycleEvent - fastMode?: boolean -} - -export class PuppeteerBrowserTool extends StructuredTool { - name = 'web_browser' - description = PUPPETEER_BROWSER_TOOL_DESCRIPTION - - private pages: LRUCache - private lastActionTime: number = Date.now() - private readonly timeout: number = 30000 // 30 seconds timeout - private readonly idleTimeout: number = 180000 // 5 minutes idle timeout - private model: ComputedRef - - private ctx: Context - private waitUntil: PuppeteerLifeCycleEvent - private disposables: Disposable[] = [] - schema = z.object({ - action: z.string().describe('The action to perform'), - params: z.string().optional().describe('The parameters for the action'), - url: z.string().optional().describe('The URL to action on') - // eslint-disable-next-line @typescript-eslint/no-explicit-any - }) as any - - private actions: Record< - string, - (url: string, params?: string) => Promise - > = { - open: this.openPage.bind(this), - summarize: this.summarizePage.bind(this), - text: this.getPageText.bind(this), - select: this.selectDiv.bind(this), - previous: this.goToPreviousPage.bind(this), - 'get-html': this.getHtml.bind(this), - 'get-structured-urls': this.getStructuredUrls.bind(this) - } - - constructor( - ctx: Context, - model: ComputedRef, - embeddings: Embeddings, - options: PuppeteerBrowserToolOptions = {} - ) { - super() - - this.ctx = ctx - this.model = model - - this.timeout = options.timeout || this.timeout - this.idleTimeout = options.idleTimeout || this.idleTimeout - - this.pages = new LRUCache({ - max: 20, - dispose: (value, key, reason) => { - value.close().catch((err) => { - this.ctx.logger.error( - `Error closing page ${key}: ${err.message}` - ) - }) - } - }) - - this.waitUntil = options.waitUntil || this.waitUntil - } - - async _call( - input: { - url: string - action: string - params: string - }, - _, - config: ChatLunaToolRunnable - ): Promise { - this.startIdleTimer() - try { - const { action, params, url } = input - - this.lastActionTime = Date.now() - - if (this.actions[action]) { - if (action === 'summarize') { - return await this.summarizePage( - url, - this.model?.value ?? config.configurable.model, - params - ) - } - return await this.actions[action](url, params) - } else { - return `Unknown action: ${action}. Available actions: ${Object.keys(this.actions).join(', ')}` - } - } catch (error) { - if (error instanceof Error) { - return `Error: ${error.message}` - } - return 'An unknown error occurred' - } - } - - private async getPage(url: string) { - if (!this.pages.has(url)) { - const puppeteer = this.ctx.puppeteer - if (!puppeteer) { - throw new Error('Puppeteer service is not available') - } - const page = await puppeteer.page() - await page.goto(url, { - waitUntil: this.waitUntil, - timeout: this.timeout - }) - this.pages.set(url, page) - } else { - this.pages.get(url) - } - - return this.pages.get(url) - } - - private async openPage(url: string, params?: string): Promise { - try { - await this.getPage(url ?? params) - return 'Page opened successfully' - } catch (error) { - console.error(error) - return `Error opening page: ${error.message}` - } - } - - private async summarizePage( - url: string, - model: ChatLunaChatModel, - searchText?: string - ): Promise { - try { - const text = await this.getPageText(url) - if (text.includes('Error getting page text')) { - return text - } - return this.summarizeText(text, model, searchText) - } catch (error) { - console.error(error) - return `Error summarizing page: ${error.message}` - } - } - - private async getPageText( - url: string, - searchText?: string - ): Promise { - try { - const page = await this.getPage(url) - if (!page) return 'No page is open, please use open action first' - - const text = await page.evaluate(() => { - // fix esbuild - // eslint-disable-next-line @typescript-eslint/no-explicit-any - window['__name'] = (func: any) => func - - const findMainContent = () => { - const candidates: { - element: Element - score: number - }[] = [] - - // Helper to calculate text density - const getTextDensity = (element: Element) => { - const text = element.textContent || '' - const html = element.innerHTML - return text.length / (html.length || 1) - } - - // Helper to check if element is likely navigation/header/footer - const isBoilerplate = (element: Element) => { - const className = element.className.toLowerCase() - const id = element.id.toLowerCase() - return /nav|header|footer|sidebar|comment|menu|copyright|related|recommend|advertisement|ad-|social|share/i.test( - `${className} ${id}` - ) - } - - // Helper to calculate hierarchical p-tag score - const getParagraphScore = (node: Element): number => { - let value = 0 - - for (const child of Array.from(node.children)) { - if (child.tagName.toLowerCase() === 'p') { - const text = child.textContent || '' - value += text.trim().length - } else { - value += getParagraphScore(child) * 0.5 - } - } - - return value - } - - // Helper to calculate table content score - const getTableScore = (node: Element): number => { - let score = 0 - - // 计算表格内容的丰富度 - const rows = node.querySelectorAll('tr').length - const cells = node.querySelectorAll('td, th').length - - if (rows > 0 && cells > 0) { - // 基础分数:行数 * 单元格平均数 - score += (cells / rows) * rows * 2 - - // 表头加分 - const headers = node.querySelectorAll('th').length - if (headers > 0) score += headers * 5 - - // 表格标题加分 - const caption = node.querySelector('caption') - if (caption) score += 10 - - // 内容丰富度加分 - const textLength = node.textContent?.length || 0 - if (textLength > 0) { - score += Math.min(textLength / 100, 50) // 最多加50分 - } - } - - return score - } - - // Common content patterns - const contentPatterns = [ - /article|post|content|main|body|text/i, - /^(article|main|content)$/i - ] - - // Specific class/id scoring patterns - const specificPatterns = { - content: - /content|article-content|post-content|entry-content|main-content/i, - table: /table-content|data-table|grid|list/i, - article: /article|post|entry|blog/i, - main: /main|primary|central/i - } - - // Score each potential content container - document - .querySelectorAll('div, article, main, section, table') - .forEach((element) => { - if (isBoilerplate(element)) return - - let score = 0 - const identifiers = - `${element.className} ${element.id}`.toLowerCase() - - // 标签评分 - const tagName = element.tagName.toLowerCase() - if (tagName === 'article') score += 30 - if (tagName === 'main') score += 25 - if (tagName === 'table') score += 15 - - // 特定类名/ID评分 - Object.entries(specificPatterns).forEach( - ([key, pattern]) => { - if (pattern.test(identifiers)) { - switch (key) { - case 'content': - score += 40 - break - case 'table': - score += 25 - break - case 'article': - score += 30 - break - case 'main': - score += 20 - break - } - } - } - ) - - // 通用内容模式评分 - contentPatterns.forEach((pattern) => { - if (pattern.test(identifiers)) score += 20 - }) - - // 内容密度评分 - const density = getTextDensity(element) - score += density * 50 - - // 段落评分 - const paragraphs = - element.getElementsByTagName('p').length - score += paragraphs * 3 - - // 标题评分 - const headings = - element.querySelectorAll( - 'h1,h2,h3,h4,h5,h6' - ).length - score += headings * 5 - - // 表格内容评分 - if ( - tagName === 'table' || - element.querySelector('table') - ) { - score += getTableScore(element) - } - - // 层级段落评分 - const paragraphScore = getParagraphScore(element) - score += paragraphScore * 2 - - // 长度惩罚 - const text = element.textContent || '' - if (text.length < 250) score *= 0.7 - - // 位置评分 - const rect = element.getBoundingClientRect() - const verticalCenter = Math.abs( - 0.5 - - rect.top / - document.documentElement.scrollHeight - ) - score *= 1 - verticalCenter * 0.3 - - // 表格特定优化 - if ( - tagName === 'table' || - element.querySelector('table') - ) { - // 如果是数据展示类的表格,降低文本长度惩罚 - if ( - text.length < 250 && - element.querySelectorAll('td').length > 20 - ) { - score *= 1.5 // 补偿一些分数 - } - } - - candidates.push({ element, score }) - }) - - // Return highest scoring element - candidates.sort((a, b) => b.score - a.score) - return candidates[0]?.element || document.body - } - - const mainContent = findMainContent() - let structuredText = '' - - const processNode = (node: Node, depth: number = 0) => { - if (node.nodeType === Node.TEXT_NODE) { - const trimmedText = node.textContent?.trim() - if (trimmedText) { - structuredText += ' ' + trimmedText - } - } else if (node.nodeType === Node.ELEMENT_NODE) { - const element = node as Element - const tagName = element.tagName.toLowerCase() - - switch (tagName) { - case 'p': - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - structuredText += '\n'.repeat(depth > 0 ? 1 : 2) - structuredText += `${'#'.repeat(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].indexOf(tagName) + 1)} ` - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'ul': - case 'ol': - structuredText += '\n' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'li': - if (element.textContent?.trim()) { - structuredText += - '\n' + ' '.repeat(depth) + '- ' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - } - break - case 'br': - structuredText += '\n' - break - case 'strong': - case 'b': - structuredText += ` **${element.textContent?.trim()}** ` - break - case 'em': - case 'i': - structuredText += ` *${element.textContent?.trim()}* ` - break - case 'code': - structuredText += ` \`${element.textContent?.trim()}\` ` - break - case 'pre': - structuredText += - '\n```\n' + - element.textContent?.trim() + - '\n```\n' - break - case 'blockquote': - structuredText += - '\n> ' + - element.textContent - ?.trim() - .replace(/\n/g, '\n> ') + - '\n' - break - case 'table': - structuredText += '\n' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'tr': - structuredText += '|' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'th': - case 'td': - structuredText += ` ${element.textContent?.trim()} |` - break - case 'mark': - case 'u': - structuredText += ` __${element.textContent?.trim()}__ ` - break - case 'del': - case 's': - structuredText += ` ~~${element.textContent?.trim()}~~ ` - break - case 'sup': - structuredText += `^${element.textContent?.trim()}` - break - case 'sub': - structuredText += `~${element.textContent?.trim()}` - break - case 'kbd': - structuredText += ` ${element.textContent?.trim()} ` - break - case 'cite': - case 'dfn': - structuredText += ` *${element.textContent?.trim()}* ` - break - case 'span': { - const className = element.className - - if (className.includes('highlight')) { - structuredText += ` **${element.textContent?.trim()}** ` - } else if (className.includes('italic')) { - structuredText += ` *${element.textContent?.trim()}* ` - } else { - structuredText += ` ${element.textContent?.trim()} ` - } - break - } - case 'abbr': { - const title = element.getAttribute('title') - structuredText += title - ? ` ${element.textContent?.trim()} (${title})` - : ` ${element.textContent?.trim()}` - break - } - case 'q': - structuredText += ` "${element.textContent?.trim()}" ` - break - case 'time': { - const datetime = - element.getAttribute('datetime') - structuredText += datetime - ? ` ${element.textContent?.trim()} [${datetime}]` - : ` ${element.textContent?.trim()}` - break - } - case 'details': - structuredText += '\n
\n' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n
\n' - break - case 'summary': - structuredText += '' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'figure': - structuredText += '\n' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'figcaption': - structuredText += `\n_${element.textContent?.trim()}_\n` - break - case 'hr': - structuredText += '\n---\n' - break - case 'dl': - structuredText += '\n' - for (const child of element.childNodes) { - processNode(child, depth + 1) - } - structuredText += '\n' - break - case 'dt': - structuredText += `\n**${element.textContent?.trim()}**` - break - case 'dd': - structuredText += `: ${element.textContent?.trim()}\n` - break - case 'var': - structuredText += ` _${element.textContent?.trim()}_ ` - break - case 'samp': - structuredText += ` \`${element.textContent?.trim()}\` ` - break - default: - if ( - tagName !== 'script' && - tagName !== 'style' && - tagName !== 'meta' - ) { - for (const child of element.childNodes) { - processNode(child, depth) - } - } - } - } - } - - // 添加相关链接部分 - const getRelatedLinks = (content: Element) => { - const currentUrl = window.location.href - const currentHost = window.location.hostname - const currentPath = window.location.pathname - - interface LinkGroup { - samePath: string[] - sameHost: string[] - external: string[] - } - - const links: LinkGroup = { - samePath: [], - sameHost: [], - external: [] - } - - // 获取链接的上下文(前后50个字符) - const getLinkContext = (link: Element): string => { - const parent = link.parentElement - if (!parent) return '' - - const text = parent.textContent || '' - const linkText = link.textContent || '' - const linkPos = text.indexOf(linkText) - - if (linkPos === -1) return '' - - const start = Math.max(0, linkPos - 50) - const end = Math.min( - text.length, - linkPos + linkText.length + 50 - ) - - return text.slice(start, end).trim() - } - - content.querySelectorAll('a[href]').forEach((link) => { - const href = link.getAttribute('href') - if (!href) return - - try { - const url = new URL(href, currentUrl) - const linkText = link.textContent?.trim() - const context = getLinkContext(link) - - // 忽略空链接、锚点链接和常见功能性链接 - if ( - !linkText || - url.href === currentUrl || - href.startsWith('#') || - /login|signup|register|cart|search|account/i.test( - url.pathname - ) - ) { - return - } - - const linkMd = `- [${linkText}](${url.href})${context ? `\n > ${context}` : ''}` - - if (url.hostname === currentHost) { - if ( - url.pathname.startsWith(currentPath) || - currentPath.startsWith(url.pathname) - ) { - if (!links.samePath.includes(linkMd)) { - links.samePath.push(linkMd) - } - } else { - if (!links.sameHost.includes(linkMd)) { - links.sameHost.push(linkMd) - } - } - } else { - if (!links.external.includes(linkMd)) { - links.external.push(linkMd) - } - } - } catch (e) { - // 忽略无效链接 - } - }) - - let relatedLinksText = '' - - // 只有当有链接时才添加标题 - if ( - links.samePath.length > 0 || - links.sameHost.length > 0 || - links.external.length > 0 - ) { - relatedLinksText = '\n\n## Related Links\n\n' - - if (links.samePath.length > 0) { - relatedLinksText += - '### Same Section\n' + - links.samePath.slice(0, 2).join('\n') + - '\n\n' - } - - if (links.sameHost.length > 0) { - relatedLinksText += - '### Same Site\n' + - links.sameHost.slice(0, 2).join('\n') + - '\n\n' - } - - if (links.external.length > 0) { - relatedLinksText += - '### External References\n' + - links.external.slice(0, 2).join('\n') + - '\n\n' - } - } - - return relatedLinksText - } - - // 处理主要内容 - processNode(mainContent) - - const findBestLinkContainer = ( - mainContent: Element - ): Element => { - const MAX_PARENT_DEPTH = 6 // 限制最大父级查找深度 - let current: Element | null = mainContent - - for (let depth = 0; depth < MAX_PARENT_DEPTH; depth++) { - if (!current) break - - current = current.parentElement - } - - return mainContent // 如果没找到,返回原始内容 - } - - // 使用方式 - structuredText += getRelatedLinks( - findBestLinkContainer(mainContent) - ) - - return structuredText.trim().replace(/\n{3,}/g, '\n\n') - }) - - // 去除空行,并去除首尾空行,还去除多空格为单空格 - return text - .trim() - .replace(/\n{3,}/g, '\n\n') - .trim() - .replace(/\s+/g, ' ') - } catch (error) { - console.error(error) - return `Error getting page text: ${error.message}` - } - } - - private async summarizeText( - text: string, - model: ChatLunaChatModel, - searchText?: string - ): Promise { - try { - const input = `Text: ${text} - -${ - searchText - ? `Search Focus: "${searchText}" - -First, evaluate if the text content is relevant to the search focus: -1. Identify key concepts in both the search focus and text -2. Check for direct mentions or related terminology -3. Assess contextual relevance -4. Consider semantic relationships - -If the content is NOT relevant to the search focus, output exactly: [none] - -Only if the content IS relevant, provide a comprehensive summary following these guidelines:` - : 'Please provide a comprehensive summary following these guidelines:' -} - -1. Main Points (1-2 paragraphs): - - Key topics and themes - - Central arguments or findings - - Essential context - -2. Supporting Details (2-3 paragraphs): - - Evidence and examples - - Data or statistics - - Expert opinions or quotes - ${searchText ? '- Specific information related to search focus' : ''} - -3. Additional Context (1 paragraph): - - Limitations or caveats - - Alternative viewpoints - - Related considerations - -Guidelines: -- Use clear, concise language -- Maintain objectivity -- Include relevant quotes or statistics -- Reference up to 5 important links -- Stay faithful to source material -- CRITICAL: Use the exact same language as the input text - -IMPORTANT: Your summary MUST be in the same language as the original text. Do not translate or change the language under any circumstances. - -Your summary or [none]:` - - const summary = await model.invoke(input, { - temperature: 0 - }) - return getMessageContent(summary.content) - } catch (error) { - console.error(error) - return `Error summarizing text: ${error.message}` - } - } - - private async selectDiv(url: string, selector: string): Promise { - try { - const page = await this.getPage(url) - if (!page) return 'No page is open' - const content = await page.evaluate((sel) => { - const element = document.querySelector(sel) - return element ? element.textContent : 'Element not found' - }, selector) - return content || 'No content found' - } catch (error) { - console.error(`Error selecting div: ${error}`) - return `Error selecting div: ${error.message}` - } - } - - private async goToPreviousPage(url: string): Promise { - try { - const page = await this.getPage(url) - if (!page) return 'No page is open' - await page.goBack({ - waitUntil: 'networkidle2', - timeout: this.timeout - }) - return 'Navigated to previous page' - } catch (error) { - console.error(`Error navigating to previous page: ${error.message}`) - return `Error navigating to previous page: ${error}` - } - } - - private async getHtml(url: string): Promise { - try { - const page = await this.getPage(url) - if (!page) return 'No page is open' - return await page.content() - } catch (error) { - console.error(error) - return `Error getting HTML: ${error.message}` - } - } - - private async getStructuredUrls(url: string): Promise { - try { - const page = await this.getPage(url) - if (!page) return 'No page is open' - return await page.evaluate(() => { - const urlStructure: { [key: string]: string[] } = { - search: [], - navigation: [], - external: [], - other: [] - } - - const currentHost = window.location.hostname - - document.querySelectorAll('a').forEach((a) => { - const href = a.href - if (!href) return - - const url = new URL(href) - const linkText = a.textContent?.trim() || '' - - if (url.hostname === currentHost) { - if ( - url.pathname.includes('search') || - url.search.includes('q=') - ) { - urlStructure.search.push(`${linkText}: ${href}`) - } else if ( - a.closest('nav') || - a.matches('header a, footer a') - ) { - urlStructure.navigation.push(`${linkText}: ${href}`) - } else { - urlStructure.other.push(`${linkText}: ${href}`) - } - } else { - urlStructure.external.push(`${linkText}: ${href}`) - } - }) - - return JSON.stringify(urlStructure, null, 2) - }) - } catch (error) { - console.error(error) - return `Error getting structured URLs: ${error.message}` - } - } - - private startIdleTimer() { - if (this.disposables.length > 0) { - return - } - this.disposables.push( - this.ctx.setInterval(() => { - if (Date.now() - this.lastActionTime > this.idleTimeout) { - this.closeBrowser() - } - }, 60000) - ) // Check every minute - this.ctx.on('dispose', async () => { - this.closeBrowser() - }) - } - - async closeBrowser() { - try { - if (this.pages) { - this.pages.forEach((page) => { - page.close().catch((err) => { - this.ctx.logger.error( - `Error closing page: ${err.message}` - ) - }) - }) - this.pages.clear() - } - for (const disposable of this.disposables) { - disposable() - } - this.disposables = [] - } catch (error) { - this.ctx.logger.error(error) - } - } -} diff --git a/packages/service-search/src/tools/search.ts b/packages/service-search/src/tools/search.ts index e687acecc..76c05b502 100644 --- a/packages/service-search/src/tools/search.ts +++ b/packages/service-search/src/tools/search.ts @@ -1,7 +1,7 @@ /* eslint-disable max-len */ import { Tool } from '@langchain/core/tools' import { SearchManager } from '../provide' -import { PuppeteerBrowserTool } from './puppeteerBrowserTool' +import { BrowserManager } from './browser/manager' import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters' import { MemoryVectorStore } from 'koishi-plugin-chatluna/llm-core/vectorstores' import { Embeddings } from '@langchain/core/embeddings' @@ -34,7 +34,7 @@ export class SearchTool extends Tool { constructor( private searchManager: SearchManager, - private browserTool: PuppeteerBrowserTool, + private browser: BrowserManager, private embeddings: Embeddings, llm: ComputedRef, private summaryType: SummaryType @@ -44,139 +44,126 @@ export class SearchTool extends Tool { this.llm = llm } - async _call(arg: string, _, config: ChatLunaToolRunnable): Promise { - const documents = await this.fetchSearchResult(arg) + async _call( + query: string, + _, + config: ChatLunaToolRunnable + ): Promise { + const llm = this.llm?.value ?? config.configurable.model + + const docs = await this.fetchSearchResult(query, llm, config) if (this.summaryType !== SummaryType.Balanced) { return JSON.stringify( - documents.map((document) => - Object.assign({}, document.metadata as SearchResult) - ) + docs.map((doc) => Object.assign({}, doc.metadata)) ) } - const fakeSearchResult = await generateFakeSearchResult( - arg, - this.llm?.value ?? config.configurable.model - ) + const result = await generateFakeSearchResult(query, llm) return JSON.stringify( - await this._reRankDocuments( - getMessageContent(fakeSearchResult.content), - documents - ) + await this._reRankDocuments(getMessageContent(result.content), docs) ) } - private async fetchSearchResult(query: string) { + private async fetchSearchResult( + query: string, + llm: ChatLunaChatModel, + runConfig: ChatLunaToolRunnable + ) { const results = await this.searchManager.search(query) - if (this.summaryType === SummaryType.Quality) { - return await Promise.all( - results.map(async (result, k) => { - let pageContent = result.description - - if (pageContent == null || pageContent.length < 500) { - const browserContent: string = - await this.browserTool.invoke({ - url: result.url, - action: 'summarize', - params: query - }) - - if ( - !browserContent.includes( - 'Error getting page text:' - ) && - !browserContent.includes( - 'Error summarizing page:' - ) && - browserContent !== '[none]' - ) { - pageContent = browserContent - } - } - - if (pageContent == null) { - return - } - - const chunks = await this._textSplitter - .splitText(pageContent) - .then((chunks) => { - return chunks.map( - (chunk) => - ({ - pageContent: chunk, - metadata: Object.assign( - { description: chunks }, - removeProperty(result, [ - 'description' - ]) - ) - }) as Document - ) - }) - - return chunks - }) - ).then((documents) => documents.flat()) - } else if (this.summaryType === SummaryType.Balanced) { - return await Promise.all( - results.map(async (result, k) => { - let pageContent = result.description - - if (pageContent == null || pageContent.length < 500) { - const browserContent: string = - await this.browserTool.invoke({ - url: result.url, - action: 'text' - }) - - if ( - !browserContent.includes( - 'Error getting page text:' - ) && - !browserContent.includes( - 'Error summarizing page:' - ) && - browserContent !== '[none]' - ) { - pageContent = browserContent - } - } - - if (pageContent == null) { - return - } - - const chunks = await this._textSplitter - .splitText(pageContent) - .then((chunks) => { - return chunks.map( - (chunk) => - ({ - pageContent: chunk, - metadata: result - }) as Document - ) - }) - - return chunks - }) - ).then((documents) => documents.flat()) + if (this.summaryType === SummaryType.Speed) { + return results.map((result) => ({ + pageContent: result.description, + metadata: result + })) as Document[] } - return results.map( - (result) => - ({ - pageContent: result.description, - metadata: result - }) as Document - ) + const docs: Document[] = [] + for (const result of results) { + try { + docs.push( + ...(await this.createDocuments( + result, + query, + llm, + runConfig + )) + ) + } catch (err) { + logger.error(err) + } + } + + return docs + } + + private async createDocuments( + result: SearchResult, + query: string, + llm: ChatLunaChatModel, + runConfig: ChatLunaToolRunnable + ) { + const content = await this.readResult(result, query, llm, runConfig) + + if (content == null) return [] + + const chunks = await this._textSplitter.splitText(content) + + return chunks.map((chunk) => { + const metadata = + this.summaryType === SummaryType.Quality + ? Object.assign( + { description: chunk }, + removeProperty(result, ['description']) + ) + : Object.assign({}, result, { description: chunk }) + + return { + pageContent: chunk, + metadata + } as Document + }) + } + + private async readResult( + result: SearchResult, + query: string, + llm: ChatLunaChatModel, + runConfig: ChatLunaToolRunnable + ) { + if (result.url.length < 1) return result.description + + if (result.description && result.description.length >= 500) { + return result.description + } + + const text = await (async () => { + try { + return this.summaryType === SummaryType.Quality + ? await this.browser.summarize( + { url: result.url, focus: query }, + llm, + runConfig + ) + : await this.browser.readText( + { url: result.url }, + runConfig + ) + } catch { + return result.description + } + })() + + if (isBrowserError(text)) return result.description + + return text } private async _reRankDocuments(query: string, documents: Document[]) { + if (documents.length < 1) return [] + if (this.embeddings === emptyEmbeddings) { logger.warn('Embeddings is empty, try check your config') return documents @@ -232,3 +219,11 @@ Generate a brief, factual answer that: Answer the question as if you are a search result snippet.`, inputVariables: ['query'] }) + +function isBrowserError(text: string) { + return ( + text.includes('Error getting page text:') || + text.includes('Error summarizing page:') || + text === '[none]' + ) +} diff --git a/packages/service-search/src/utils/parse.ts b/packages/service-search/src/utils/parse.ts index dff6dc96d..e9c762115 100644 --- a/packages/service-search/src/utils/parse.ts +++ b/packages/service-search/src/utils/parse.ts @@ -1,3 +1,5 @@ +import { SearchAction } from '../types' + /** * 预处理内容,移除可能的 markdown 代码块标记 */ @@ -17,10 +19,9 @@ export function preprocessContent(content: string): string { /** * 尝试解析 JSON,失败时返回 null */ -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export function tryParseJSON(content: string): any { +export function tryParseJSON(content: string): T | null { try { - return JSON.parse(content) + return JSON.parse(content) as T } catch (e) { return null } @@ -86,3 +87,27 @@ export function removeProperty( return result as Omit } + +export function parseSearchAction(content: string): SearchAction { + const action = preprocessContent(content) + const parsed = tryParseJSON(action) + + if (parsed) return parsed + + const fixed = tryParseJSON(attemptToFixJSON(action)) + + if (fixed) return fixed + + if (action.includes('[skip]')) { + return { + action: 'skip', + thought: 'skip the search' + } + } + + return { + action: 'search', + thought: action, + content: [action] + } +} diff --git a/packages/service-vector-store/package.json b/packages/service-vector-store/package.json index ebac12d20..3d4988ed3 100644 --- a/packages/service-vector-store/package.json +++ b/packages/service-vector-store/package.json @@ -58,7 +58,7 @@ "@zilliz/milvus2-sdk-node": "^2.6.2", "faiss-node": "^0.5.1", "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" }, "peerDependenciesMeta": { "@zilliz/milvus2-sdk-node": { diff --git a/packages/shared-adapter/package.json b/packages/shared-adapter/package.json index 2468bce82..d5936e71f 100644 --- a/packages/shared-adapter/package.json +++ b/packages/shared-adapter/package.json @@ -70,6 +70,6 @@ }, "peerDependencies": { "koishi": "^4.18.9", - "koishi-plugin-chatluna": "^1.4.0-alpha.15" + "koishi-plugin-chatluna": "^1.4.0-alpha.16" } }