diff --git a/archives/code/dead-code-batch-3/README.md b/archives/code/dead-code-batch-3/README.md new file mode 100644 index 000000000..bea975789 --- /dev/null +++ b/archives/code/dead-code-batch-3/README.md @@ -0,0 +1,14 @@ +# Dead Code Batch 3 + +- Purpose: archive retired MCP runtime code that is no longer part of the active in-memory server set. +- Archived at: 2026-03-26 +- Rationale: `meetingServer.ts` has been removed from live MCP registration and default config, but is retained in source form for precise rollback if the feature is rebuilt later. + +## Archived Paths + +- `src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts` + +## Notes + +- This directory is not part of the runtime, build, typecheck, or test target set. +- Restore by moving files back to their original paths only if a future audit proves the retired MCP server is needed again. diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts b/archives/code/dead-code-batch-3/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts similarity index 100% rename from src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts rename to archives/code/dead-code-batch-3/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts diff --git a/scripts/generate-i18n-types.js b/scripts/generate-i18n-types.js index 9e24b7c3a..5c212e7c7 100644 --- a/scripts/generate-i18n-types.js +++ b/scripts/generate-i18n-types.js @@ -1,6 +1,6 @@ import fs from 'fs' import path from 'path' -import { fileURLToPath } from 'url' +import { fileURLToPath, pathToFileURL } from 'url' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) @@ -57,6 +57,6 @@ async function main() { } // 仅需要在本地开发时执行 -if (import.meta.url === `file://${process.argv[1]}`) { +if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { main() } diff --git a/src/main/events.ts b/src/main/events.ts index 38c6ac140..77da1c471 100644 --- a/src/main/events.ts +++ b/src/main/events.ts @@ -219,11 +219,6 @@ export const TRAY_EVENTS = { CHECK_FOR_UPDATES: 'tray:check-for-updates' // 托盘检查更新 } -// MCP会议专用事件 -export const MEETING_EVENTS = { - INSTRUCTION: 'mcp:meeting-instruction' // 主进程向渲染进程发送指令 -} - // 悬浮按钮相关事件 export const FLOATING_BUTTON_EVENTS = { CLICKED: 'floating-button:clicked', // 悬浮按钮被点击 diff --git a/src/main/presenter/configPresenter/index.ts b/src/main/presenter/configPresenter/index.ts index 12a9a7fb9..353f3ac99 100644 --- a/src/main/presenter/configPresenter/index.ts +++ b/src/main/presenter/configPresenter/index.ts @@ -102,7 +102,7 @@ interface IAppSettings { enableSkills?: boolean // Skills system global toggle hooksNotifications?: HooksNotificationsSettings // Hooks & notifications settings defaultModel?: { providerId: string; modelId: string } // Default model for new conversations - defaultVisionModel?: { providerId: string; modelId: string } // Default vision model for image tools + defaultVisionModel?: { providerId: string; modelId: string } // Legacy vision model setting for migration only defaultProjectPath?: string | null acpRegistryMigrationVersion?: number unifiedAgentsMigrationVersion?: number @@ -153,6 +153,15 @@ const isModelSelection = (value: unknown): value is ModelSelection => { return typeof record.providerId === 'string' && typeof record.modelId === 'string' } +const normalizeKnownModelId = (modelId: string): string => { + const normalizedModelId = modelId.trim().toLowerCase() + return normalizedModelId.replace(/^models\//, '') +} + +const normalizeKnownProviderId = (providerId: string): string => + modelCapabilities.resolveProviderId(providerId.trim().toLowerCase()) || + providerId.trim().toLowerCase() + export const getAnthropicModelSelectionKeysToClear = ( settings: Partial< Record< @@ -362,6 +371,7 @@ export class ConfigPresenter implements IConfigPresenter { setAgentRepository(agentRepository: AgentRepository): void { this.agentRepository = agentRepository this.initializeUnifiedAgents() + this.migrateLegacyDefaultVisionModelToBuiltinAgent() } private getAgentRepositoryOrThrow(): AgentRepository { @@ -396,6 +406,35 @@ export class ConfigPresenter implements IConfigPresenter { this.syncRegistryAgentsToRepository() } + private migrateLegacyDefaultVisionModelToBuiltinAgent(): void { + const legacySelection = this.store.get('defaultVisionModel') as unknown + if (legacySelection === undefined) { + return + } + + const builtinVisionModel = this.getBuiltinDeepChatConfig().visionModel + + if ( + isModelSelection(legacySelection) && + (!builtinVisionModel?.providerId || !builtinVisionModel?.modelId) + ) { + const providerId = legacySelection.providerId.trim() + const modelId = legacySelection.modelId.trim() + + if (providerId && modelId) { + this.updateBuiltinDeepChatConfig({ + visionModel: { + providerId, + modelId + } + }) + } + } + + this.store.delete('defaultVisionModel') + eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultVisionModel', undefined) + } + private buildLegacyBuiltinDeepChatConfig(): DeepChatAgentConfig { const defaultModel = this.store.get('defaultModel') as ModelSelection | undefined const assistantModel = this.store.get('assistantModel') as ModelSelection | undefined @@ -760,7 +799,9 @@ export class ConfigPresenter implements IConfigPresenter { const keysToClear = getAnthropicModelSelectionKeysToClear({ defaultModel: this.getSetting('defaultModel'), assistantModel: this.getSetting('assistantModel'), - defaultVisionModel: this.getSetting('defaultVisionModel'), + defaultVisionModel: this.store.get('defaultVisionModel') as + | { providerId: string; modelId: string } + | undefined, preferredModel: this.getSetting('preferredModel') }) @@ -780,9 +821,6 @@ export class ConfigPresenter implements IConfigPresenter { if (key === 'assistantModel') { return this.getBuiltinDeepChatConfig().assistantModel as T | undefined } - if (key === 'defaultVisionModel') { - return this.getDefaultVisionModel() as T | undefined - } if (key === 'default_system_prompt') { return this.getBuiltinDeepChatConfig().systemPrompt as T | undefined } @@ -808,10 +846,6 @@ export class ConfigPresenter implements IConfigPresenter { eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, key, value) return } - if (key === 'defaultVisionModel') { - this.setDefaultVisionModel(value as { providerId: string; modelId: string } | undefined) - return - } if (key === 'default_system_prompt') { this.updateBuiltinDeepChatConfig({ systemPrompt: typeof value === 'string' ? value : '' @@ -1015,6 +1049,26 @@ export class ConfigPresenter implements IConfigPresenter { return this.providerModelHelper.getCustomModels(providerId) } + isKnownModel(providerId: string, modelId: string): boolean { + const normalizedProviderId = normalizeKnownProviderId(providerId) + const normalizedModelId = normalizeKnownModelId(modelId) + + if (!normalizedProviderId || !normalizedModelId) { + return false + } + + const hasKnownModel = (models: Array<{ id: string }> | undefined): boolean => + Array.isArray(models) && + models.some((model) => normalizeKnownModelId(model.id) === normalizedModelId) + + return ( + this.hasUserModelConfig(normalizedModelId, normalizedProviderId) || + hasKnownModel(this.getProviderModels(normalizedProviderId)) || + hasKnownModel(this.getCustomModels(normalizedProviderId)) || + hasKnownModel(this.getDbProviderModels(normalizedProviderId)) + ) + } + setCustomModels(providerId: string, models: MODEL_META[]): void { this.providerModelHelper.setCustomModels(providerId, models) } @@ -1688,6 +1742,18 @@ export class ConfigPresenter implements IConfigPresenter { ) } + async agentSupportsCapability(agentId: string, capability: 'vision'): Promise { + if (capability !== 'vision') { + return false + } + + const agentConfig = await this.resolveDeepChatAgentConfig(agentId) + const providerId = agentConfig.visionModel?.providerId?.trim() + const modelId = agentConfig.visionModel?.modelId?.trim() + + return Boolean(providerId && modelId && this.getModelConfig(modelId, providerId)?.vision) + } + async createDeepChatAgent(input: CreateDeepChatAgentInput): Promise { const created = this.getAgentRepositoryOrThrow().createDeepChatAgent(input) this.notifyAcpAgentsChanged() @@ -2312,32 +2378,6 @@ export class ConfigPresenter implements IConfigPresenter { eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultModel', model) } - getDefaultVisionModel(): { providerId: string; modelId: string } | undefined { - const selection = this.getBuiltinDeepChatConfig().visionModel - if (selection?.providerId && selection?.modelId) { - return { - providerId: selection.providerId, - modelId: selection.modelId - } - } - return this.store.get('defaultVisionModel') as - | { providerId: string; modelId: string } - | undefined - } - - setDefaultVisionModel(model: { providerId: string; modelId: string } | undefined): void { - this.updateBuiltinDeepChatConfig({ - visionModel: - model?.providerId && model?.modelId - ? { - providerId: model.providerId, - modelId: model.modelId - } - : null - }) - eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultVisionModel', model) - } - getDefaultProjectPath(): string | null { const path = this.getSetting('defaultProjectPath') return path?.trim() ? path.trim() : null diff --git a/src/main/presenter/configPresenter/mcpConfHelper.ts b/src/main/presenter/configPresenter/mcpConfHelper.ts index b3c5f0a5b..4fd1a2543 100644 --- a/src/main/presenter/configPresenter/mcpConfHelper.ts +++ b/src/main/presenter/configPresenter/mcpConfHelper.ts @@ -165,16 +165,6 @@ const DEFAULT_INMEMORY_SERVERS: Record> }, disable: false }, - imageServer: { - args: [], - descriptions: 'Image processing MCP service', - icons: '🖼️', - autoApprove: ['read_image_base64', 'read_multiple_images_base64'], // Auto-approve reading, require confirmation for uploads - type: 'inmemory' as MCPServerType, - command: 'image', // We need to map this command to the ImageServer class later - env: {}, - disable: false - }, ragflowKnowledge: { args: [], descriptions: 'DeepChat内置RAGFlow知识库检索服务', @@ -258,16 +248,6 @@ const DEFAULT_INMEMORY_SERVERS: Record> env: {}, disable: false }, - 'deepchat-inmemory/meeting-server': { - args: [], - descriptions: 'DeepChat内置会议服务,用于组织多Agent讨论', - icons: '👥', - autoApprove: ['all'], - type: 'inmemory' as MCPServerType, - command: 'deepchat-inmemory/meeting-server', - env: {}, - disable: false - }, // Merge platform-specific services ...PLATFORM_SPECIFIC_SERVERS } @@ -384,15 +364,35 @@ export class McpConfHelper { private removeDeprecatedBuiltInServers( servers: Record ): Record { - const deprecatedBuiltInServers = ['powerpack'] + const deprecatedBuiltInServers = [ + 'powerpack', + 'deepchat-inmemory/meeting-server', + 'imageServer' + ] + let hasChanges = false + const removedBuiltInServers = new Set(this.getRemovedBuiltInServers()) + let removedListChanged = false for (const serverName of deprecatedBuiltInServers) { if (servers[serverName]) { console.log(`Removing deprecated built-in MCP service: ${serverName}`) delete servers[serverName] + hasChanges = true + } + + if (removedBuiltInServers.delete(serverName)) { + removedListChanged = true } } + if (hasChanges) { + this.mcpStore.set('mcpServers', servers) + } + + if (removedListChanged) { + this.setRemovedBuiltInServers(Array.from(removedBuiltInServers)) + } + return servers } @@ -913,15 +913,9 @@ export class McpConfHelper { } try { - const mcpServers = this.mcpStore.get('mcpServers') || {} - - if (mcpServers.powerpack) { - console.log('Removing deprecated powerpack MCP server') - delete mcpServers.powerpack - this.mcpStore.set('mcpServers', mcpServers) - } + this.removeDeprecatedBuiltInServers(this.mcpStore.get('mcpServers') || {}) } catch (error) { - console.error('Error occurred while removing deprecated powerpack server:', error) + console.error('Error occurred while removing deprecated built-in MCP servers:', error) } // 升级后检查并添加平台特有服务 diff --git a/src/main/presenter/deepchatAgentPresenter/dispatch.ts b/src/main/presenter/deepchatAgentPresenter/dispatch.ts index 6874379db..37fdd809e 100644 --- a/src/main/presenter/deepchatAgentPresenter/dispatch.ts +++ b/src/main/presenter/deepchatAgentPresenter/dispatch.ts @@ -679,6 +679,20 @@ export async function executeTools( } } + if (hooks?.normalizeToolResult) { + toolRawData = { + ...toolRawData, + content: await hooks.normalizeToolResult({ + sessionId: io.sessionId, + toolCallId: tc.id, + toolName: tc.name, + toolArgs: tc.arguments, + content: toolRawData.content, + isError: toolRawData.isError === true + }) + } + } + const searchPayload = extractSearchPayload( toolRawData.content, toolContext.name, diff --git a/src/main/presenter/deepchatAgentPresenter/index.ts b/src/main/presenter/deepchatAgentPresenter/index.ts index 6971a592e..eaf527c24 100644 --- a/src/main/presenter/deepchatAgentPresenter/index.ts +++ b/src/main/presenter/deepchatAgentPresenter/index.ts @@ -52,6 +52,7 @@ import { ToolOutputGuard } from './toolOutputGuard' import type { ProviderRequestTracePayload } from '../llmProviderPresenter/requestTrace' import type { NewSessionHooksBridge } from '../hooksNotifications/newSessionBridge' import { providerDbLoader } from '../configPresenter/providerDbLoader' +import { resolveSessionVisionTarget } from '../vision/sessionVisionResolver' type PendingInteractionEntry = { interaction: PendingToolInteraction @@ -107,6 +108,16 @@ const isReasoningEffort = (value: unknown): value is 'minimal' | 'low' | 'medium const isVerbosity = (value: unknown): value is 'low' | 'medium' | 'high' => value === 'low' || value === 'medium' || value === 'high' +const createAbortError = (): Error => { + if (typeof DOMException !== 'undefined') { + return new DOMException('Aborted', 'AbortError') + } + + const error = new Error('Aborted') + error.name = 'AbortError' + return error +} + export class DeepChatAgentPresenter implements IAgentImplementation { private readonly llmProviderPresenter: ILlmProviderPresenter private readonly configPresenter: IConfigPresenter @@ -1011,6 +1022,23 @@ export class DeepChatAgentPresenter implements IAgentImplementation { return undefined } + private getAbortSignalForSession(sessionId: string): AbortSignal | undefined { + return ( + this.activeGenerations.get(sessionId)?.abortController.signal ?? + this.abortControllers.get(sessionId)?.signal + ) + } + + private throwIfAbortRequested(signal?: AbortSignal): void { + if (signal?.aborted) { + throw createAbortError() + } + } + + private isAbortError(error: unknown): boolean { + return error instanceof Error && (error.name === 'AbortError' || error.name === 'CanceledError') + } + private dispatchResolvedToolHook(params: { sessionId: string messageId: string @@ -1424,7 +1452,17 @@ export class DeepChatAgentPresenter implements IAgentImplementation { body: gap } }) - } + }, + normalizeToolResult: async (tool) => + await this.normalizeToolResultContent({ + sessionId: tool.sessionId, + toolCallId: tool.toolCallId, + toolName: tool.toolName, + toolArgs: tool.toolArgs, + content: tool.content, + isError: tool.isError, + abortSignal: abortController.signal + }) }, io: { sessionId, @@ -2867,7 +2905,16 @@ export class DeepChatAgentPresenter implements IAgentImplementation { permissionRequest: rawData.permissionRequest as PendingToolInteraction['permission'] } } - const responseText = this.toolContentToText(rawData.content) + const normalizedContent = await this.normalizeToolResultContent({ + sessionId, + toolCallId: toolCall.id || '', + toolName, + toolArgs: toolCall.params || '{}', + content: rawData.content, + isError: rawData.isError === true, + abortSignal: this.getAbortSignalForSession(sessionId) + }) + const responseText = this.toolContentToText(normalizedContent) const prepared = await this.toolOutputGuard.prepareToolOutput({ sessionId, toolCallId: toolCall.id || '', @@ -2956,6 +3003,199 @@ export class DeepChatAgentPresenter implements IAgentImplementation { }) } + private async normalizeToolResultContent(params: { + sessionId: string + toolCallId: string + toolName: string + toolArgs: string + content: MCPToolResponse['content'] + isError: boolean + abortSignal?: AbortSignal + }): Promise { + if (params.isError) { + return params.content + } + + const abortSignal = params.abortSignal ?? this.getAbortSignalForSession(params.sessionId) + const screenshotPayload = this.extractScreenshotToolPayload( + params.toolName, + params.toolArgs, + params.content + ) + if (!screenshotPayload) { + return params.content + } + + try { + this.throwIfAbortRequested(abortSignal) + const visionModel = await this.resolveScreenshotVisionModel(params.sessionId, abortSignal) + this.throwIfAbortRequested(abortSignal) + + if (!visionModel) { + return 'Screenshot captured, but automatic English analysis is unavailable because neither the current session model nor the agent vision model can analyze images.' + } + + const messages: ChatMessage[] = [ + { + role: 'user', + content: [ + { + type: 'text', + text: this.buildScreenshotAnalysisPrompt() + }, + { + type: 'image_url', + image_url: { + url: screenshotPayload.dataUrl, + detail: 'auto' + } + } + ] + } + ] + + const modelConfig = this.configPresenter.getModelConfig( + visionModel.modelId, + visionModel.providerId + ) + const response = await this.llmProviderPresenter.generateCompletionStandalone( + visionModel.providerId, + messages, + visionModel.modelId, + modelConfig?.temperature ?? 0.2, + Math.min(modelConfig?.maxTokens ?? 900, 900), + abortSignal ? { signal: abortSignal } : undefined + ) + this.throwIfAbortRequested(abortSignal) + const normalized = response.trim() + if (!normalized) { + return 'Screenshot captured, but automatic English analysis returned no usable description.' + } + return normalized + } catch (error) { + if (this.isAbortError(error)) { + return 'Screenshot captured, but automatic English analysis was canceled.' + } + + const message = error instanceof Error ? error.message : String(error) + console.warn('[DeepChatAgent] Failed to normalize screenshot tool output:', { + sessionId: params.sessionId, + toolCallId: params.toolCallId, + error: message + }) + return `Screenshot captured, but automatic English analysis failed: ${message}` + } + } + + private extractScreenshotToolPayload( + toolName: string, + toolArgs: string, + content: MCPToolResponse['content'] + ): { dataUrl: string } | null { + if (toolName !== 'cdp_send' || typeof content !== 'string') { + return null + } + + const parsedArgs = this.parseJsonRecord(toolArgs) + if (!parsedArgs || parsedArgs.method !== 'Page.captureScreenshot') { + return null + } + + const parsedContent = this.parseJsonRecord(content) + const rawData = typeof parsedContent?.data === 'string' ? parsedContent.data.trim() : '' + if (!rawData) { + return null + } + + const screenshotParams = this.normalizeJsonRecord(parsedArgs.params) + const mimeType = this.resolveScreenshotMimeType(screenshotParams?.format) + const dataUrl = rawData.startsWith('data:image/') + ? rawData + : `data:${mimeType};base64,${rawData}` + + return { dataUrl } + } + + private normalizeJsonRecord(value: unknown): Record | null { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as Record + } + + if (typeof value !== 'string' || !value.trim()) { + return null + } + + return this.parseJsonRecord(value) + } + + private parseJsonRecord(value: string): Record | null { + try { + const parsed = JSON.parse(value) as unknown + if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) { + return parsed as Record + } + } catch {} + + return null + } + + private resolveScreenshotMimeType(format: unknown): string { + if (format === 'jpeg') { + return 'image/jpeg' + } + if (format === 'webp') { + return 'image/webp' + } + return 'image/png' + } + + private async resolveScreenshotVisionModel( + sessionId: string, + abortSignal?: AbortSignal + ): Promise<{ providerId: string; modelId: string } | null> { + this.throwIfAbortRequested(abortSignal) + const state = this.runtimeState.get(sessionId) + const dbSession = this.sessionStore.get(sessionId) + const agentId = this.getSessionAgentId(sessionId) ?? 'deepchat' + const resolved = await resolveSessionVisionTarget({ + providerId: state?.providerId ?? dbSession?.provider_id, + modelId: state?.modelId ?? dbSession?.model_id, + agentId, + configPresenter: this.configPresenter, + signal: abortSignal, + logLabel: `screenshot:${sessionId}` + }) + this.throwIfAbortRequested(abortSignal) + + if (!resolved) { + return null + } + + if (resolved.source === 'agent-vision-model') { + const agentSupportsVision = + (await this.configPresenter.agentSupportsCapability?.(agentId, 'vision')) === true + this.throwIfAbortRequested(abortSignal) + if (!agentSupportsVision) { + return null + } + } + + return { + providerId: resolved.providerId, + modelId: resolved.modelId + } + } + + private buildScreenshotAnalysisPrompt(): string { + return [ + 'Analyze this browser screenshot and respond in English only.', + 'Describe only what is clearly visible.', + 'Include the page type or layout, the most important visible text, interactive controls, status indicators, warnings, errors, and any detail that matters for the next browser action.', + 'Do not speculate about hidden or unreadable content.', + 'Return detailed plain text in a single paragraph.' + ].join('\n') + } + private toolContentToText(content: MCPToolResponse['content']): string { if (typeof content === 'string') { return content diff --git a/src/main/presenter/deepchatAgentPresenter/types.ts b/src/main/presenter/deepchatAgentPresenter/types.ts index 785ae76e1..df53d0936 100644 --- a/src/main/presenter/deepchatAgentPresenter/types.ts +++ b/src/main/presenter/deepchatAgentPresenter/types.ts @@ -6,7 +6,7 @@ import type { } from '@shared/types/agent-interface' import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events' import type { ChatMessage } from '@shared/types/core/chat-message' -import type { MCPToolDefinition } from '@shared/types/core/mcp' +import type { MCPToolDefinition, MCPToolResponse } from '@shared/types/core/mcp' import type { ModelConfig } from '@shared/presenter' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' import type { DeepChatMessageStore } from './messageStore' @@ -76,6 +76,14 @@ export interface ProcessHooks { reasoningContentLength: number toolCallCount: number }) => void + normalizeToolResult?: (tool: { + sessionId: string + toolCallId: string + toolName: string + toolArgs: string + content: MCPToolResponse['content'] + isError: boolean + }) => Promise } export interface PendingToolInteraction { diff --git a/src/main/presenter/index.ts b/src/main/presenter/index.ts index 6d513c108..185eb9ccb 100644 --- a/src/main/presenter/index.ts +++ b/src/main/presenter/index.ts @@ -259,6 +259,18 @@ export class Presenter implements IPresenter { return null }, + resolveConversationSessionInfo: async (conversationId) => { + const session = await this.newAgentPresenter?.getSession(conversationId) + if (!session) { + return null + } + + return { + agentId: session.agentId, + providerId: session.providerId, + modelId: session.modelId + } + }, getSkillPresenter: () => this.skillPresenter, getYoBrowserToolHandler: () => this.yoBrowserPresenter.toolHandler, getFilePresenter: () => ({ diff --git a/src/main/presenter/llmProviderPresenter/index.ts b/src/main/presenter/llmProviderPresenter/index.ts index d5caa1eaf..8cd0ca95c 100644 --- a/src/main/presenter/llmProviderPresenter/index.ts +++ b/src/main/presenter/llmProviderPresenter/index.ts @@ -33,6 +33,16 @@ import { AcpSessionPersistence } from './acp' import { AcpProvider } from './providers/acpProvider' import type { ProviderMcpRuntimePort } from './runtimePorts' +const createAbortError = (): Error => { + if (typeof DOMException !== 'undefined') { + return new DOMException('Aborted', 'AbortError') + } + + const error = new Error('Aborted') + error.name = 'AbortError' + return error +} + export class LLMProviderPresenter implements ILlmProviderPresenter { private currentProviderId: string | null = null private readonly activeStreams: Map = new Map() @@ -258,16 +268,37 @@ export class LLMProviderPresenter implements ILlmProviderPresenter { messages: ChatMessage[], modelId: string, temperature?: number, - maxTokens?: number + maxTokens?: number, + options?: { signal?: AbortSignal } ): Promise { const provider = this.getProviderInstance(providerId) let response = '' + const signal = options?.signal + + if (signal?.aborted) { + throw createAbortError() + } + + const completionPromise = provider.completions(messages, modelId, temperature, maxTokens) + const abortPromise = + signal && + new Promise((_, reject) => { + const onAbort = () => reject(createAbortError()) + signal.addEventListener('abort', onAbort, { once: true }) + completionPromise.finally(() => signal.removeEventListener('abort', onAbort)) + }) + try { - const llmResponse = await provider.completions(messages, modelId, temperature, maxTokens) + const llmResponse = await (abortPromise + ? Promise.race([completionPromise, abortPromise]) + : completionPromise) response = llmResponse.content return response } catch (error) { + if (signal?.aborted || (error instanceof Error && error.name === 'AbortError')) { + throw error + } console.error('Stream error:', error) return '' } diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts b/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts index 9a02e88fb..62272a788 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts @@ -2,21 +2,19 @@ import { ArtifactsServer } from './artifactsServer' // FileSystemServer has been removed - filesystem capabilities are now provided via Agent tools import { BochaSearchServer } from './bochaSearchServer' import { BraveSearchServer } from './braveSearchServer' -import { ImageServer } from './imageServer' import { DifyKnowledgeServer } from './difyKnowledgeServer' import { RagflowKnowledgeServer } from './ragflowKnowledgeServer' import { FastGptKnowledgeServer } from './fastGptKnowledgeServer' import { DeepResearchServer } from './deepResearchServer' import { AutoPromptingServer } from './autoPromptingServer' import { ConversationSearchServer } from './conversationSearchServer' -import { MeetingServer } from './meetingServer' import { BuiltinKnowledgeServer } from './builtinKnowledgeServer' import { BuiltinKnowledgeConfig } from '@shared/presenter' import { AppleServer } from './appleServer' export function getInMemoryServer( serverName: string, - args: string[], + _args: string[], env?: Record ) { switch (serverName) { @@ -29,8 +27,6 @@ export function getInMemoryServer( return new BraveSearchServer(env) case 'deepResearch': return new DeepResearchServer(env) - case 'imageServer': - return new ImageServer(args[0] || undefined, args[1] || undefined) case 'difyKnowledge': return new DifyKnowledgeServer( env as { @@ -79,8 +75,6 @@ export function getInMemoryServer( return new AutoPromptingServer() case 'deepchat-inmemory/conversation-search-server': return new ConversationSearchServer() - case 'deepchat-inmemory/meeting-server': - return new MeetingServer() case 'deepchat/apple-server': // 只在 macOS 上创建 AppleServer if (process.platform !== 'darwin') { diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts deleted file mode 100644 index 370acb6eb..000000000 --- a/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts +++ /dev/null @@ -1,479 +0,0 @@ -import { Server } from '@modelcontextprotocol/sdk/server/index.js' -import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' -import fs from 'fs/promises' -import path from 'path' -import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' -import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' -import { presenter } from '@/presenter' -import { ChatMessage, ChatMessageContent } from '@shared/presenter' -// import { GenerateCompletionOptions } from '@/presenter/llmProviderPresenter' // Assuming this path and type exist - using any for now - -// --- Zod Schemas for Tool Arguments --- - -const ReadImageBase64ArgsSchema = z.object({ - path: z.string().describe('Path to the image file.') -}) - -const UploadImageArgsSchema = z.object({ - path: z.string().describe('Path to the image file to upload.') -}) - -const ReadMultipleImagesBase64ArgsSchema = z.object({ - paths: z.array(z.string()).describe('List of paths to the image files.') -}) - -const UploadMultipleImagesArgsSchema = z.object({ - paths: z.array(z.string()).describe('List of paths to the image files to upload.') -}) - -const QueryImageWithPromptArgsSchema = z.object({ - path: z.string().describe('Path to the image file to query.'), - prompt: z - .string() - .describe('The prompt to use when querying the image with the multimodal model.') -}) - -const DescribeImageArgsSchema = z.object({ - path: z.string().describe('Path to the image file to do simple describe.') -}) - -const OcrImageArgsSchema = z.object({ - path: z.string().describe('Path to the image file for OCR text extraction.') -}) - -// --- Image Server Implementation --- - -export class ImageServer { - private server: Server - private provider: string - private model: string - - constructor(provider?: string, model?: string) { - const defaultVisionModel = presenter.configPresenter.getDefaultVisionModel() - this.provider = provider || defaultVisionModel?.providerId || 'openai' - this.model = model || defaultVisionModel?.modelId || 'gpt-4o' - this.server = new Server( - { - name: 'image-processing-server', - version: '0.1.0' - }, - { - capabilities: { - tools: {} - } - } - ) - this.setupRequestHandlers() - } - - // No specific initialization needed for now, but can be added for upload service config - // public async initialize(): Promise { - // // Initialization logic, e.g., configure upload service client - // } - - private getEffectiveModel(): { provider: string; model: string } { - if (this.provider && this.model) { - return { provider: this.provider, model: this.model } - } - - const defaultVisionModel = presenter.configPresenter.getDefaultVisionModel() - if (defaultVisionModel?.providerId && defaultVisionModel?.modelId) { - return { provider: defaultVisionModel.providerId, model: defaultVisionModel.modelId } - } - - throw new Error( - 'No vision model configured. Please set a default vision model in Settings > Common > Default Model.' - ) - } - - public startServer(transport: Transport): void { - this.server.connect(transport) - } - - // --- Placeholder for Image Upload Logic --- - private async uploadImageToService(filePath: string, fileBuffer: Buffer): Promise { - // TODO: Implement actual image upload logic here - // This might involve using a library like 'axios' or a specific SDK - // for services like Imgur, AWS S3, Cloudinary, etc. - console.log(`Uploading ${filePath} (size: ${fileBuffer.length} bytes)...`) - // Replace with actual upload call - await new Promise((resolve) => setTimeout(resolve, 500)) // Simulate network delay - const fakeUrl = `https://fake-upload-service.com/uploads/${path.basename(filePath)}_${Date.now()}` - console.log(`Upload complete: ${fakeUrl}`) - return fakeUrl - } - - // --- Placeholder for Multimodal Model Interaction --- - private async queryImageWithModel( - filePath: string, - fileBuffer: Buffer, - prompt: string - ): Promise { - const { provider, model } = this.getEffectiveModel() - // TODO: Implement actual API call to a multimodal model (e.g., GPT-4o, Gemini) - console.log( - `Querying ${filePath} (size: ${fileBuffer.length} bytes) using ${provider}/${model} with prompt: "${prompt}"...` - ) - - // Construct the messages array for the multimodal model - const base64Image = fileBuffer.toString('base64') - // TODO: Dynamically determine mime type if possible, otherwise assume common type like jpeg - const dataUrl = `data:image/jpeg;base64,${base64Image}` - - const messages: ChatMessage[] = [ - { - role: 'user', - content: [ - { type: 'text', text: prompt }, // Use the provided prompt - { - type: 'image_url', - image_url: { url: dataUrl } - } - ] as ChatMessageContent[] // Type assertion might be needed depending on ChatMessageContent definition - } - ] - - const modelConfig = presenter.configPresenter.getModelConfig(model, provider) - - try { - const response = await presenter.llmproviderPresenter.generateCompletionStandalone( - provider, - messages, - model, - modelConfig?.temperature ?? 0.6, - modelConfig?.maxTokens || 1000 - ) - console.log(`Model response received: ${response}`) - return response ?? 'No response generated.' // Handle potential null/undefined response - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - console.error(`Error querying image: ${errorMessage}`) - // Re-throw or return an error message - throw new Error(`Failed to query image: ${errorMessage}`) - // Or return `Error generating response: ${errorMessage}`; - } - } - - private async ocrImageWithModel(filePath: string, fileBuffer: Buffer): Promise { - const { provider, model } = this.getEffectiveModel() - // TODO: Implement actual API call to an OCR service or a multimodal model capable of OCR - console.log( - `Requesting OCR for ${filePath} (size: ${fileBuffer.length} bytes) using ${provider}/${model}...` - ) - - // Construct the messages array for the multimodal model - const base64Image = fileBuffer.toString('base64') - // TODO: Dynamically determine mime type if possible - const dataUrl = `data:image/jpeg;base64,${base64Image}` - - const messages: ChatMessage[] = [ - { - role: 'user', - content: [ - { type: 'text', text: 'Perform OCR on this image and return the extracted text.' }, - { - type: 'image_url', - image_url: { url: dataUrl } - } - ] as ChatMessageContent[] // Type assertion - } - ] - - console.log(messages) - - const modelConfig = presenter.configPresenter.getModelConfig(model, provider) - - try { - const ocrText = await presenter.llmproviderPresenter.generateCompletionStandalone( - provider, - messages, - model, - modelConfig?.temperature ?? 0.6, - modelConfig?.maxTokens || 1000 - ) - console.log(`OCR text received: ${ocrText}`) - return ocrText ?? 'No text extracted.' // Handle potential null/undefined response - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - console.error(`Error performing OCR: ${errorMessage}`) - // Re-throw or return an error message - throw new Error(`Failed to perform OCR: ${errorMessage}`) - // Or return `Error performing OCR: ${errorMessage}`; - } - } - - // --- Request Handlers --- - - private setupRequestHandlers(): void { - // List Tools Handler - this.server.setRequestHandler(ListToolsRequestSchema, async () => { - return { - tools: [ - { - name: 'read_image_base64', - description: - 'Reads an image file from the specified path and returns its base64 encoded content.', - inputSchema: zodToJsonSchema(ReadImageBase64ArgsSchema), - annotations: { - title: 'Read Image Base64', - readOnlyHint: true - } - }, - { - name: 'upload_image', - description: - 'Uploads an image file from the specified path to a hosting service and returns the public URL.', - inputSchema: zodToJsonSchema(UploadImageArgsSchema), - annotations: { - title: 'Upload Image', - destructiveHint: false, - openWorldHint: true - } - }, - { - name: 'read_multiple_images_base64', - description: - 'Reads multiple image files from the specified paths and returns their base64 encoded content.', - inputSchema: zodToJsonSchema(ReadMultipleImagesBase64ArgsSchema), - annotations: { - title: 'Read Multiple Images Base64', - readOnlyHint: true - } - }, - { - name: 'upload_multiple_images', - description: - 'Uploads multiple image files from the specified paths to a hosting service and returns their public URLs.', - inputSchema: zodToJsonSchema(UploadMultipleImagesArgsSchema), - annotations: { - title: 'Upload Multiple Images', - destructiveHint: false, - openWorldHint: true - } - }, - { - name: 'describe_image', - description: - 'Uses a multimodal model to simply describe the image at the specified path.', - inputSchema: zodToJsonSchema(DescribeImageArgsSchema), - annotations: { - title: 'Describe Image', - readOnlyHint: true, - openWorldHint: true - } - }, - { - name: 'query_image_with_prompt', - description: - 'Uses a multimodal model to answer a query (prompt) about the image at the specified path.', - inputSchema: zodToJsonSchema(QueryImageWithPromptArgsSchema), - annotations: { - title: 'Query Image with Prompt', - readOnlyHint: true, - openWorldHint: true - } - }, - { - name: 'ocr_image', - description: - 'Performs Optical Character Recognition (OCR) on the image at the specified path and returns the extracted text.', - inputSchema: zodToJsonSchema(OcrImageArgsSchema), - annotations: { - title: 'OCR Image', - readOnlyHint: true, - openWorldHint: true - } - } - ] - } - }) - - // Call Tool Handler - this.server.setRequestHandler(CallToolRequestSchema, async (request) => { - try { - const { name, arguments: args } = request.params - - switch (name) { - case 'read_image_base64': { - const parsed = ReadImageBase64ArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - // TODO: Implement path validation if necessary (similar to FileSystemServer) - const filePath = parsed.data.path - const fileBuffer = await fs.readFile(filePath) - const base64Content = fileBuffer.toString('base64') - // Determine mime type (optional but good practice) - // const mimeType = lookup(filePath) || 'application/octet-stream'; - // const dataUri = `data:${mimeType};base64,${base64Content}`; - return { - content: [{ type: 'text', text: base64Content }] // Or return dataUri - } - } - - case 'upload_image': { - const parsed = UploadImageArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - // TODO: Implement path validation if necessary - const filePath = parsed.data.path - const fileBuffer = await fs.readFile(filePath) - const imageUrl = await this.uploadImageToService(filePath, fileBuffer) - return { - content: [{ type: 'text', text: imageUrl }] - } - } - - case 'read_multiple_images_base64': { - const parsed = ReadMultipleImagesBase64ArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - const results = await Promise.allSettled( - parsed.data.paths.map(async (filePath: string) => { - try { - // TODO: Implement path validation if necessary - const fileBuffer = await fs.readFile(filePath) - return { - path: filePath, - base64: fileBuffer.toString('base64'), - status: 'fulfilled' - } - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - // Ensure the structure includes path and error for rejected promises - return Promise.reject({ path: filePath, error: errorMessage }) - } - }) - ) - - // Format output: [{path: string, base64?: string, error?: string}] - const formattedResults = results.map((result) => { - if (result.status === 'fulfilled') { - return { path: result.value.path, base64: result.value.base64 } - } else { - // Access reason directly as it contains the rejected structure - return { path: result.reason.path, error: result.reason.error } - } - }) - - return { - content: [{ type: 'text', text: JSON.stringify(formattedResults, null, 2) }] - } - } - - case 'upload_multiple_images': { - const parsed = UploadMultipleImagesArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - - const results = await Promise.allSettled( - parsed.data.paths.map(async (filePath: string) => { - try { - // TODO: Implement path validation if necessary - const fileBuffer = await fs.readFile(filePath) - const url = await this.uploadImageToService(filePath, fileBuffer) - return { path: filePath, url: url, status: 'fulfilled' } - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - // Ensure the structure includes path and error for rejected promises - return Promise.reject({ path: filePath, error: errorMessage }) - } - }) - ) - - // Format output: [{path: string, url?: string, error?: string}] - const formattedResults = results.map((result) => { - if (result.status === 'fulfilled') { - return { path: result.value.path, url: result.value.url } - } else { - // Access reason directly as it contains the rejected structure - return { path: result.reason.path, error: result.reason.error } - } - }) - - return { - content: [{ type: 'text', text: JSON.stringify(formattedResults, null, 2) }] - } - } - - case 'describe_image': { - const parsed = DescribeImageArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - // TODO: Implement path validation if necessary - const filePath = parsed.data.path - const fileBuffer = await fs.readFile(filePath) - const description = await this.queryImageWithModel( - filePath, - fileBuffer, - 'Describe this image.' - ) - return { - content: [{ type: 'text', text: description }] - } - } - - case 'query_image_with_prompt': { - const parsed = QueryImageWithPromptArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - // TODO: Implement path validation if necessary - const filePath = parsed.data.path - const prompt = parsed.data.prompt // Get the prompt - const fileBuffer = await fs.readFile(filePath) - // Call the renamed function with the prompt - const response = await this.queryImageWithModel(filePath, fileBuffer, prompt) - return { - content: [{ type: 'text', text: response }] - } - } - - case 'ocr_image': { - const parsed = OcrImageArgsSchema.safeParse(args) - if (!parsed.success) { - throw new Error(`Invalid arguments for ${name}: ${parsed.error}`) - } - // TODO: Implement path validation if necessary - const filePath = parsed.data.path - const fileBuffer = await fs.readFile(filePath) - const ocrText = await this.ocrImageWithModel(filePath, fileBuffer) - return { - content: [{ type: 'text', text: ocrText }] - } - } - - default: - throw new Error(`Unknown tool: ${name}`) - } - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - // Consider logging the error server-side - console.error(`Error processing tool call: ${errorMessage}`) - // Ensure the error response structure matches expected format - return { - content: [{ type: 'text', text: `Error: ${errorMessage}` }], - isError: true // Indicate this is an error response - } - } - }) - } -} - -// --- Usage Example (similar to FileSystemServer) --- -// import { WebSocketServerTransport } from '@modelcontextprotocol/sdk/transport/node'; -// -// const imageServer = new ImageServer('your-llm-provider', 'your-multimodal-model'); -// // await imageServer.initialize(); // If initialization is added -// -// // Example using WebSocket transport -// const transport = new WebSocketServerTransport({ port: 8081 }); // Choose a different port -// imageServer.startServer(transport); -// console.log('ImageServer started on port 8081'); - -// You would need a client to connect to this server and call the tools. diff --git a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts index 06583badb..8426c987f 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts @@ -20,6 +20,7 @@ import { } from './chatSettingsTools' import type { AgentToolRuntimePort } from '../runtimePorts' import { YO_BROWSER_TOOL_NAMES } from '../../browser/YoBrowserToolDefinitions' +import { resolveSessionVisionTarget } from '../../vision/sessionVisionResolver' // Consider moving to a shared handlers location in future refactoring import { @@ -433,7 +434,7 @@ export class AgentToolManager { function: { name: 'read', description: - "Read the contents of a file. Supports pagination via offset/limit for large files (auto-truncated at 4500 chars if not specified). When invoked from a skill context with relative paths, provide base_directory as the skill's root directory.", + "Read the contents of a file. Supports pagination via offset/limit for large files (auto-truncated at 4500 chars if not specified). For image files, returns an English description of visible content instead of raw pixels. When invoked from a skill context with relative paths, provide base_directory as the skill's root directory.", parameters: zodToJsonSchema(schemas.read) as { type: string properties: Record @@ -721,7 +722,7 @@ export class AgentToolManager { if (this.isImageMimeType(mimeType)) { return { - content: await this.readImageWithVisionFallback(validPath, mimeType) + content: await this.readImageWithVisionFallback(validPath, mimeType, conversationId) } } @@ -1063,13 +1064,28 @@ export class AgentToolManager { return lines.join('\n') } - private async readImageWithVisionFallback(filePath: string, mimeType: string): Promise { + private async readImageWithVisionFallback( + filePath: string, + mimeType: string, + conversationId?: string + ): Promise { const fileBuffer = await fs.promises.readFile(filePath) const metadata = this.buildImageMetadataBlock(filePath, mimeType, fileBuffer.length) - const defaultVisionModel = this.configPresenter.getDefaultVisionModel?.() + let visionTarget: Awaited> + + try { + visionTarget = await this.resolveVisionTargetForConversation(conversationId) + } catch (error) { + logger.warn('[AgentToolManager] Failed to resolve vision target for image read:', { + conversationId, + filePath, + error + }) + throw error + } - if (!defaultVisionModel?.providerId || !defaultVisionModel?.modelId) { - return `${metadata}\n\nNo defaultVisionModel configured, downgraded to metadata.` + if (!visionTarget) { + return `${metadata}\n\nImage analysis unavailable because neither the current session model nor the agent vision model can analyze images.` } try { @@ -1080,12 +1096,7 @@ export class AgentToolManager { content: [ { type: 'text', - text: [ - 'Analyze this image and return exactly two sections.', - 'Section 1 title: OCR', - 'Section 2 title: Summary', - 'Keep OCR as faithful extracted text and Summary concise.' - ].join('\n') + text: this.buildImageAnalysisPrompt() }, { type: 'image_url', @@ -1096,28 +1107,61 @@ export class AgentToolManager { ] const modelConfig = this.configPresenter.getModelConfig( - defaultVisionModel.modelId, - defaultVisionModel.providerId + visionTarget.modelId, + visionTarget.providerId ) const response = await this.getLlmProviderPresenter().generateCompletionStandalone( - defaultVisionModel.providerId, + visionTarget.providerId, messages, - defaultVisionModel.modelId, + visionTarget.modelId, modelConfig?.temperature ?? 0.2, modelConfig?.maxTokens ?? 1200 ) const normalized = (response || '').trim() if (!normalized) { - return `${metadata}\n\nOCR:\n\nSummary:\nNo result returned by vision model.` + return `${metadata}\n\nImage analysis returned no usable description.` } - return normalized.startsWith('OCR:') ? normalized : `OCR:\n\nSummary:\n${normalized}` + return normalized } catch (error) { const message = error instanceof Error ? error.message : String(error) return `${metadata}\n\nVision analysis failed, downgraded to metadata.\nerror: ${message}` } } + private async resolveVisionTargetForConversation(conversationId?: string) { + if (!conversationId) { + return null + } + + try { + const sessionInfo = await this.runtimePort.resolveConversationSessionInfo(conversationId) + return await resolveSessionVisionTarget({ + providerId: sessionInfo?.providerId, + modelId: sessionInfo?.modelId, + agentId: sessionInfo?.agentId, + configPresenter: this.configPresenter, + logLabel: `read:${conversationId}` + }) + } catch (error) { + if (this.isConversationNotFoundError(error)) { + return null + } + + throw error + } + } + + private buildImageAnalysisPrompt(): string { + return [ + 'Analyze this image and respond in English only.', + 'Describe only what is clearly visible.', + 'Include the main subject, scene or layout, any legible text, UI elements if present, status indicators, warnings, errors, and any detail that matters for understanding the image.', + 'Do not speculate about hidden or unreadable content.', + 'Return detailed plain text in a single paragraph.' + ].join('\n') + } + private assertWritePermission( toolName: string, args: Record, diff --git a/src/main/presenter/toolPresenter/index.ts b/src/main/presenter/toolPresenter/index.ts index 71dcf250c..fc03ca324 100644 --- a/src/main/presenter/toolPresenter/index.ts +++ b/src/main/presenter/toolPresenter/index.ts @@ -377,6 +377,11 @@ export class ToolPresenter implements IToolPresenter { 'Use `background: true` when you know a command should detach immediately; otherwise a foreground `exec` may yield a running `sessionId` after `yieldMs`.' ) } + if (toolNames.has('read')) { + lines.push( + 'When `read` targets an image file, it returns an English description of the visible content and any legible text.' + ) + } if (toolNames.has('exec') && toolNames.has('read') && toolNames.has('edit')) { lines.push( 'Recommended file task flow: `exec` for discovery/search -> `read` -> `edit`/`write`.' diff --git a/src/main/presenter/toolPresenter/runtimePorts.ts b/src/main/presenter/toolPresenter/runtimePorts.ts index 899540811..806b436c2 100644 --- a/src/main/presenter/toolPresenter/runtimePorts.ts +++ b/src/main/presenter/toolPresenter/runtimePorts.ts @@ -6,8 +6,15 @@ import type { } from '@shared/presenter' import type { ISkillPresenter } from '@shared/types/skill' +export interface ConversationSessionInfo { + agentId: string + providerId: string + modelId: string +} + export interface AgentToolRuntimePort { resolveConversationWorkdir(conversationId: string): Promise + resolveConversationSessionInfo(conversationId: string): Promise getSkillPresenter(): ISkillPresenter getYoBrowserToolHandler(): IYoBrowserPresenter['toolHandler'] getFilePresenter(): Pick diff --git a/src/main/presenter/vision/sessionVisionResolver.ts b/src/main/presenter/vision/sessionVisionResolver.ts new file mode 100644 index 000000000..3dd075b3e --- /dev/null +++ b/src/main/presenter/vision/sessionVisionResolver.ts @@ -0,0 +1,91 @@ +import type { IConfigPresenter } from '@shared/presenter' + +export type SessionVisionTarget = { + providerId: string + modelId: string + source: 'session-model' | 'agent-vision-model' +} + +type SessionVisionResolverParams = { + providerId?: string | null + modelId?: string | null + agentId?: string | null + signal?: AbortSignal + configPresenter: Pick< + IConfigPresenter, + 'getModelConfig' | 'resolveDeepChatAgentConfig' | 'isKnownModel' + > + logLabel?: string +} + +const createAbortError = (): Error => { + if (typeof DOMException !== 'undefined') { + return new DOMException('Aborted', 'AbortError') + } + + const error = new Error('Aborted') + error.name = 'AbortError' + return error +} + +const throwIfAbortRequested = (signal?: AbortSignal): void => { + if (signal?.aborted) { + throw createAbortError() + } +} + +export async function resolveSessionVisionTarget( + params: SessionVisionResolverParams +): Promise { + throwIfAbortRequested(params.signal) + const sessionProviderId = params.providerId?.trim() + const sessionModelId = params.modelId?.trim() + const sessionModelConfig = + sessionProviderId && sessionModelId + ? params.configPresenter.getModelConfig(sessionModelId, sessionProviderId) + : null + + if ( + sessionProviderId && + sessionModelId && + params.configPresenter.isKnownModel?.(sessionProviderId, sessionModelId) === true && + sessionModelConfig?.vision + ) { + return { + providerId: sessionProviderId, + modelId: sessionModelId, + source: 'session-model' + } + } + + const agentId = params.agentId?.trim() + if (!agentId) { + return null + } + + try { + throwIfAbortRequested(params.signal) + const agentConfig = await params.configPresenter.resolveDeepChatAgentConfig(agentId) + throwIfAbortRequested(params.signal) + const providerId = agentConfig.visionModel?.providerId?.trim() + const modelId = agentConfig.visionModel?.modelId?.trim() + if (providerId && modelId) { + return { + providerId, + modelId, + source: 'agent-vision-model' + } + } + } catch (error) { + if (error instanceof Error && error.name === 'AbortError') { + throw error + } + console.warn('[Vision] Failed to resolve agent vision model:', { + agentId, + context: params.logLabel ?? 'unknown', + error + }) + } + + return null +} diff --git a/src/renderer/settings/components/AcpSettings.vue b/src/renderer/settings/components/AcpSettings.vue index b90851bb4..80a11e9fe 100644 --- a/src/renderer/settings/components/AcpSettings.vue +++ b/src/renderer/settings/components/AcpSettings.vue @@ -75,14 +75,9 @@ {{ t('settings.acp.installedSectionDescription') }}

-
- - {{ t('settings.acp.installedCount', { count: installedRegistryAgents.length }) }} - - -
+ + {{ t('settings.acp.installedCount', { count: installedRegistryAgents.length }) }} +
{{ t('settings.acp.installedEmptyDescription') }}

-
diff --git a/src/renderer/settings/components/common/DefaultModelSettingsSection.vue b/src/renderer/settings/components/common/DefaultModelSettingsSection.vue index cef7bd378..7a2ef692e 100644 --- a/src/renderer/settings/components/common/DefaultModelSettingsSection.vue +++ b/src/renderer/settings/components/common/DefaultModelSettingsSection.vue @@ -68,42 +68,6 @@ - -
- {{ - t('settings.common.defaultModel.visionModel') - }} -
- - - - - - - - -
-
@@ -119,7 +83,6 @@ import ModelIcon from '@/components/icons/ModelIcon.vue' import { useThemeStore } from '@/stores/theme' import { useModelStore } from '@/stores/modelStore' import { usePresenter } from '@/composables/usePresenter' -import { ModelType } from '@shared/model' import type { RENDERER_MODEL_META } from '@shared/presenter' const { t } = useI18n() @@ -129,7 +92,6 @@ const configPresenter = usePresenter('configPresenter') const assistantModelSelectOpen = ref(false) const chatModelSelectOpen = ref(false) -const visionModelSelectOpen = ref(false) interface SelectedModel { providerId: string @@ -138,7 +100,6 @@ interface SelectedModel { const selectedAssistantModel = ref(null) const selectedChatModel = ref(null) -const selectedVisionModel = ref(null) let isSyncingModelDefaults = false const selectBySetting = ( @@ -164,7 +125,7 @@ const selectBySetting = ( } const persistModelSetting = async ( - key: 'assistantModel' | 'defaultModel' | 'defaultVisionModel', + key: 'assistantModel' | 'defaultModel', previous: { providerId: string; modelId: string } | undefined, current: SelectedModel | null ): Promise => { @@ -198,15 +159,6 @@ const handleChatModelSelect = async ( chatModelSelectOpen.value = false } -const handleVisionModelSelect = async ( - model: RENDERER_MODEL_META, - providerId: string -): Promise => { - selectedVisionModel.value = { providerId, model } - await configPresenter.setSetting('defaultVisionModel', { providerId, modelId: model.id }) - visionModelSelectOpen.value = false -} - const syncModelSelections = async (): Promise => { if (isSyncingModelDefaults) { return @@ -219,9 +171,6 @@ const syncModelSelections = async (): Promise => { const defaultModelSetting = (await configPresenter.getSetting('defaultModel')) as | { providerId: string; modelId: string } | undefined - const defaultVisionModelSetting = (await configPresenter.getSetting('defaultVisionModel')) as - | { providerId: string; modelId: string } - | undefined const chatSelection = selectBySetting( defaultModelSetting, @@ -233,21 +182,11 @@ const syncModelSelections = async (): Promise => { (_model, providerId) => providerId !== 'acp' ) - const visionSelection = selectBySetting( - defaultVisionModelSetting, - (model, providerId) => - providerId !== 'acp' && - Boolean(model.vision) && - (model.type === ModelType.Chat || model.type === ModelType.ImageGeneration) - ) - selectedChatModel.value = chatSelection selectedAssistantModel.value = assistantSelection - selectedVisionModel.value = visionSelection await persistModelSetting('defaultModel', defaultModelSetting, chatSelection) await persistModelSetting('assistantModel', assistantModelSetting, assistantSelection) - await persistModelSetting('defaultVisionModel', defaultVisionModelSetting, visionSelection) } catch (error) { console.error('Failed to sync model selections:', error) } finally { diff --git a/src/renderer/src/components/mcp-config/mcpServerForm.vue b/src/renderer/src/components/mcp-config/mcpServerForm.vue index 8b1ca56bb..8c5d32923 100644 --- a/src/renderer/src/components/mcp-config/mcpServerForm.vue +++ b/src/renderer/src/components/mcp-config/mcpServerForm.vue @@ -18,16 +18,12 @@ import { EmojiPicker } from '@/components/emoji-picker' import { useToast } from '@/components/use-toast' import { Icon } from '@iconify/vue' import { X } from 'lucide-vue-next' -import ModelIcon from '@/components/icons/ModelIcon.vue' -import { useModelStore } from '@/stores/modelStore' import { usePresenter } from '@/composables/usePresenter' import { nanoid } from 'nanoid' const { t } = useI18n() const { toast } = useToast() -const modelStore = useModelStore() const devicePresenter = usePresenter('devicePresenter') -const configPresenter = usePresenter('configPresenter') const props = defineProps<{ serverName?: string initialConfig?: MCPServerConfig @@ -57,14 +53,8 @@ const customHeadersFocused = ref(false) const customHeadersDisplayValue = ref('') const npmRegistry = ref(props.initialConfig?.customNpmRegistry || '') -// imageServer 展示用(只读,来源于 defaultVisionModel) -const selectedImageModelName = ref('') -const selectedImageModelProvider = ref('') - // 判断是否是inmemory类型 const isInMemoryType = computed(() => type.value === 'inmemory') -// 判断是否是imageServer -const isImageServer = computed(() => isInMemoryType.value && name.value === 'imageServer') // 判断是否是buildInFileSystem const isBuildInFileSystem = computed( () => isInMemoryType.value && name.value === 'buildInFileSystem' @@ -80,32 +70,6 @@ const formatJsonHeaders = (headers: Record): string => { .map(([key, value]) => `${key}=${value}`) .join('\n') } -const refreshImageServerDefaultModelDisplay = async (): Promise => { - if (!isImageServer.value) { - selectedImageModelName.value = '' - selectedImageModelProvider.value = '' - return - } - - const defaultVisionModel = (await configPresenter.getSetting('defaultVisionModel')) as - | { providerId: string; modelId: string } - | undefined - if (!defaultVisionModel?.providerId || !defaultVisionModel?.modelId) { - selectedImageModelName.value = '' - selectedImageModelProvider.value = '' - return - } - - selectedImageModelProvider.value = defaultVisionModel.providerId - const providerEntry = modelStore.enabledModels.find( - (entry) => entry.providerId === defaultVisionModel.providerId - ) - const resolvedModel = providerEntry?.models.find( - (model) => model.id === defaultVisionModel.modelId - ) - selectedImageModelName.value = - resolvedModel?.name || `${defaultVisionModel.providerId}/${defaultVisionModel.modelId}` -} // 获取内置服务器的本地化名称和描述 const getLocalizedName = computed(() => { @@ -144,11 +108,9 @@ const jsonConfig = ref('') const showBaseUrl = computed(() => isRemoteType.value) // 添加计算属性来控制命令相关字段的显示 const showCommandFields = computed(() => type.value === 'stdio') -// 控制参数输入框的显示 (stdio 或 非imageServer且非buildInFileSystem的inmemory) +// 控制参数输入框的显示 (stdio 或 非buildInFileSystem的inmemory) const showArgsInput = computed( - () => - showCommandFields.value || - (isInMemoryType.value && !isImageServer.value && !isBuildInFileSystem.value) + () => showCommandFields.value || (isInMemoryType.value && !isBuildInFileSystem.value) ) // 控制文件夹选择界面的显示 (仅针对 buildInFileSystem) @@ -253,11 +215,11 @@ const isNameValid = computed(() => name.value.trim().length > 0) const isCommandValid = computed(() => { // 对于SSE类型,命令不是必需的 if (isRemoteType.value) return true - // 对于STDIO 或 inmemory 类型,命令是必需的 (排除内置 server) - if (type.value === 'stdio' || (isInMemoryType.value && !isImageServer.value)) { + // 对于STDIO 或 inmemory 类型,命令是必需的 + if (type.value === 'stdio' || isInMemoryType.value) { return command.value.trim().length > 0 } - return true // 其他情况(如 imageServer)默认有效 + return true }) const isEnvValid = computed(() => { try { @@ -473,11 +435,9 @@ const handleSubmit = (): void => { } } else { // STDIO 或 inmemory 类型的服务器 - const normalizedArgs = isImageServer.value - ? [] - : isBuildInFileSystem.value - ? foldersList.value.filter((folder) => folder.trim().length > 0) - : argsRows.value.map((row) => row.value.trim()).filter((value) => value.length > 0) + const normalizedArgs = isBuildInFileSystem.value + ? foldersList.value.filter((folder) => folder.trim().length > 0) + : argsRows.value.map((row) => row.value.trim()).filter((value) => value.length > 0) serverConfig = { ...baseConfig, command: command.value.trim(), @@ -592,15 +552,6 @@ watch( { immediate: true } ) -// imageServer 仅展示默认视觉模型,不再通过 args 配置 -watch( - [() => name.value, () => type.value, () => modelStore.enabledModels], - () => { - void refreshImageServerDefaultModelDisplay() - }, - { immediate: true, deep: true } -) - // Watch for initial config changes (primarily for edit mode) watch( () => props.initialConfig, @@ -793,25 +744,6 @@ HTTP-Referer=deepchatai.cn` /> - -
- -
- - {{ - selectedImageModelName || t('settings.mcp.serverForm.imageModel') - }} -
-
-