diff --git a/archives/code/dead-code-batch-3/README.md b/archives/code/dead-code-batch-3/README.md
new file mode 100644
index 000000000..bea975789
--- /dev/null
+++ b/archives/code/dead-code-batch-3/README.md
@@ -0,0 +1,14 @@
+# Dead Code Batch 3
+
+- Purpose: archive retired MCP runtime code that is no longer part of the active in-memory server set.
+- Archived at: 2026-03-26
+- Rationale: `meetingServer.ts` has been removed from live MCP registration and default config, but is retained in source form for precise rollback if the feature is rebuilt later.
+
+## Archived Paths
+
+- `src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts`
+
+## Notes
+
+- This directory is not part of the runtime, build, typecheck, or test target set.
+- Restore by moving files back to their original paths only if a future audit proves the retired MCP server is needed again.
diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts b/archives/code/dead-code-batch-3/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts
similarity index 100%
rename from src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts
rename to archives/code/dead-code-batch-3/src/main/presenter/mcpPresenter/inMemoryServers/meetingServer.ts
diff --git a/scripts/generate-i18n-types.js b/scripts/generate-i18n-types.js
index 9e24b7c3a..5c212e7c7 100644
--- a/scripts/generate-i18n-types.js
+++ b/scripts/generate-i18n-types.js
@@ -1,6 +1,6 @@
import fs from 'fs'
import path from 'path'
-import { fileURLToPath } from 'url'
+import { fileURLToPath, pathToFileURL } from 'url'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
@@ -57,6 +57,6 @@ async function main() {
}
// 仅需要在本地开发时执行
-if (import.meta.url === `file://${process.argv[1]}`) {
+if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
main()
}
diff --git a/src/main/events.ts b/src/main/events.ts
index 38c6ac140..77da1c471 100644
--- a/src/main/events.ts
+++ b/src/main/events.ts
@@ -219,11 +219,6 @@ export const TRAY_EVENTS = {
CHECK_FOR_UPDATES: 'tray:check-for-updates' // 托盘检查更新
}
-// MCP会议专用事件
-export const MEETING_EVENTS = {
- INSTRUCTION: 'mcp:meeting-instruction' // 主进程向渲染进程发送指令
-}
-
// 悬浮按钮相关事件
export const FLOATING_BUTTON_EVENTS = {
CLICKED: 'floating-button:clicked', // 悬浮按钮被点击
diff --git a/src/main/presenter/configPresenter/index.ts b/src/main/presenter/configPresenter/index.ts
index 12a9a7fb9..353f3ac99 100644
--- a/src/main/presenter/configPresenter/index.ts
+++ b/src/main/presenter/configPresenter/index.ts
@@ -102,7 +102,7 @@ interface IAppSettings {
enableSkills?: boolean // Skills system global toggle
hooksNotifications?: HooksNotificationsSettings // Hooks & notifications settings
defaultModel?: { providerId: string; modelId: string } // Default model for new conversations
- defaultVisionModel?: { providerId: string; modelId: string } // Default vision model for image tools
+ defaultVisionModel?: { providerId: string; modelId: string } // Legacy vision model setting for migration only
defaultProjectPath?: string | null
acpRegistryMigrationVersion?: number
unifiedAgentsMigrationVersion?: number
@@ -153,6 +153,15 @@ const isModelSelection = (value: unknown): value is ModelSelection => {
return typeof record.providerId === 'string' && typeof record.modelId === 'string'
}
+const normalizeKnownModelId = (modelId: string): string => {
+ const normalizedModelId = modelId.trim().toLowerCase()
+ return normalizedModelId.replace(/^models\//, '')
+}
+
+const normalizeKnownProviderId = (providerId: string): string =>
+ modelCapabilities.resolveProviderId(providerId.trim().toLowerCase()) ||
+ providerId.trim().toLowerCase()
+
export const getAnthropicModelSelectionKeysToClear = (
settings: Partial<
Record<
@@ -362,6 +371,7 @@ export class ConfigPresenter implements IConfigPresenter {
setAgentRepository(agentRepository: AgentRepository): void {
this.agentRepository = agentRepository
this.initializeUnifiedAgents()
+ this.migrateLegacyDefaultVisionModelToBuiltinAgent()
}
private getAgentRepositoryOrThrow(): AgentRepository {
@@ -396,6 +406,35 @@ export class ConfigPresenter implements IConfigPresenter {
this.syncRegistryAgentsToRepository()
}
+ private migrateLegacyDefaultVisionModelToBuiltinAgent(): void {
+ const legacySelection = this.store.get('defaultVisionModel') as unknown
+ if (legacySelection === undefined) {
+ return
+ }
+
+ const builtinVisionModel = this.getBuiltinDeepChatConfig().visionModel
+
+ if (
+ isModelSelection(legacySelection) &&
+ (!builtinVisionModel?.providerId || !builtinVisionModel?.modelId)
+ ) {
+ const providerId = legacySelection.providerId.trim()
+ const modelId = legacySelection.modelId.trim()
+
+ if (providerId && modelId) {
+ this.updateBuiltinDeepChatConfig({
+ visionModel: {
+ providerId,
+ modelId
+ }
+ })
+ }
+ }
+
+ this.store.delete('defaultVisionModel')
+ eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultVisionModel', undefined)
+ }
+
private buildLegacyBuiltinDeepChatConfig(): DeepChatAgentConfig {
const defaultModel = this.store.get('defaultModel') as ModelSelection | undefined
const assistantModel = this.store.get('assistantModel') as ModelSelection | undefined
@@ -760,7 +799,9 @@ export class ConfigPresenter implements IConfigPresenter {
const keysToClear = getAnthropicModelSelectionKeysToClear({
defaultModel: this.getSetting('defaultModel'),
assistantModel: this.getSetting('assistantModel'),
- defaultVisionModel: this.getSetting('defaultVisionModel'),
+ defaultVisionModel: this.store.get('defaultVisionModel') as
+ | { providerId: string; modelId: string }
+ | undefined,
preferredModel: this.getSetting('preferredModel')
})
@@ -780,9 +821,6 @@ export class ConfigPresenter implements IConfigPresenter {
if (key === 'assistantModel') {
return this.getBuiltinDeepChatConfig().assistantModel as T | undefined
}
- if (key === 'defaultVisionModel') {
- return this.getDefaultVisionModel() as T | undefined
- }
if (key === 'default_system_prompt') {
return this.getBuiltinDeepChatConfig().systemPrompt as T | undefined
}
@@ -808,10 +846,6 @@ export class ConfigPresenter implements IConfigPresenter {
eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, key, value)
return
}
- if (key === 'defaultVisionModel') {
- this.setDefaultVisionModel(value as { providerId: string; modelId: string } | undefined)
- return
- }
if (key === 'default_system_prompt') {
this.updateBuiltinDeepChatConfig({
systemPrompt: typeof value === 'string' ? value : ''
@@ -1015,6 +1049,26 @@ export class ConfigPresenter implements IConfigPresenter {
return this.providerModelHelper.getCustomModels(providerId)
}
+ isKnownModel(providerId: string, modelId: string): boolean {
+ const normalizedProviderId = normalizeKnownProviderId(providerId)
+ const normalizedModelId = normalizeKnownModelId(modelId)
+
+ if (!normalizedProviderId || !normalizedModelId) {
+ return false
+ }
+
+ const hasKnownModel = (models: Array<{ id: string }> | undefined): boolean =>
+ Array.isArray(models) &&
+ models.some((model) => normalizeKnownModelId(model.id) === normalizedModelId)
+
+ return (
+ this.hasUserModelConfig(normalizedModelId, normalizedProviderId) ||
+ hasKnownModel(this.getProviderModels(normalizedProviderId)) ||
+ hasKnownModel(this.getCustomModels(normalizedProviderId)) ||
+ hasKnownModel(this.getDbProviderModels(normalizedProviderId))
+ )
+ }
+
setCustomModels(providerId: string, models: MODEL_META[]): void {
this.providerModelHelper.setCustomModels(providerId, models)
}
@@ -1688,6 +1742,18 @@ export class ConfigPresenter implements IConfigPresenter {
)
}
+ async agentSupportsCapability(agentId: string, capability: 'vision'): Promise {
+ if (capability !== 'vision') {
+ return false
+ }
+
+ const agentConfig = await this.resolveDeepChatAgentConfig(agentId)
+ const providerId = agentConfig.visionModel?.providerId?.trim()
+ const modelId = agentConfig.visionModel?.modelId?.trim()
+
+ return Boolean(providerId && modelId && this.getModelConfig(modelId, providerId)?.vision)
+ }
+
async createDeepChatAgent(input: CreateDeepChatAgentInput): Promise {
const created = this.getAgentRepositoryOrThrow().createDeepChatAgent(input)
this.notifyAcpAgentsChanged()
@@ -2312,32 +2378,6 @@ export class ConfigPresenter implements IConfigPresenter {
eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultModel', model)
}
- getDefaultVisionModel(): { providerId: string; modelId: string } | undefined {
- const selection = this.getBuiltinDeepChatConfig().visionModel
- if (selection?.providerId && selection?.modelId) {
- return {
- providerId: selection.providerId,
- modelId: selection.modelId
- }
- }
- return this.store.get('defaultVisionModel') as
- | { providerId: string; modelId: string }
- | undefined
- }
-
- setDefaultVisionModel(model: { providerId: string; modelId: string } | undefined): void {
- this.updateBuiltinDeepChatConfig({
- visionModel:
- model?.providerId && model?.modelId
- ? {
- providerId: model.providerId,
- modelId: model.modelId
- }
- : null
- })
- eventBus.sendToMain(CONFIG_EVENTS.SETTING_CHANGED, 'defaultVisionModel', model)
- }
-
getDefaultProjectPath(): string | null {
const path = this.getSetting('defaultProjectPath')
return path?.trim() ? path.trim() : null
diff --git a/src/main/presenter/configPresenter/mcpConfHelper.ts b/src/main/presenter/configPresenter/mcpConfHelper.ts
index b3c5f0a5b..4fd1a2543 100644
--- a/src/main/presenter/configPresenter/mcpConfHelper.ts
+++ b/src/main/presenter/configPresenter/mcpConfHelper.ts
@@ -165,16 +165,6 @@ const DEFAULT_INMEMORY_SERVERS: Record>
},
disable: false
},
- imageServer: {
- args: [],
- descriptions: 'Image processing MCP service',
- icons: '🖼️',
- autoApprove: ['read_image_base64', 'read_multiple_images_base64'], // Auto-approve reading, require confirmation for uploads
- type: 'inmemory' as MCPServerType,
- command: 'image', // We need to map this command to the ImageServer class later
- env: {},
- disable: false
- },
ragflowKnowledge: {
args: [],
descriptions: 'DeepChat内置RAGFlow知识库检索服务',
@@ -258,16 +248,6 @@ const DEFAULT_INMEMORY_SERVERS: Record>
env: {},
disable: false
},
- 'deepchat-inmemory/meeting-server': {
- args: [],
- descriptions: 'DeepChat内置会议服务,用于组织多Agent讨论',
- icons: '👥',
- autoApprove: ['all'],
- type: 'inmemory' as MCPServerType,
- command: 'deepchat-inmemory/meeting-server',
- env: {},
- disable: false
- },
// Merge platform-specific services
...PLATFORM_SPECIFIC_SERVERS
}
@@ -384,15 +364,35 @@ export class McpConfHelper {
private removeDeprecatedBuiltInServers(
servers: Record
): Record {
- const deprecatedBuiltInServers = ['powerpack']
+ const deprecatedBuiltInServers = [
+ 'powerpack',
+ 'deepchat-inmemory/meeting-server',
+ 'imageServer'
+ ]
+ let hasChanges = false
+ const removedBuiltInServers = new Set(this.getRemovedBuiltInServers())
+ let removedListChanged = false
for (const serverName of deprecatedBuiltInServers) {
if (servers[serverName]) {
console.log(`Removing deprecated built-in MCP service: ${serverName}`)
delete servers[serverName]
+ hasChanges = true
+ }
+
+ if (removedBuiltInServers.delete(serverName)) {
+ removedListChanged = true
}
}
+ if (hasChanges) {
+ this.mcpStore.set('mcpServers', servers)
+ }
+
+ if (removedListChanged) {
+ this.setRemovedBuiltInServers(Array.from(removedBuiltInServers))
+ }
+
return servers
}
@@ -913,15 +913,9 @@ export class McpConfHelper {
}
try {
- const mcpServers = this.mcpStore.get('mcpServers') || {}
-
- if (mcpServers.powerpack) {
- console.log('Removing deprecated powerpack MCP server')
- delete mcpServers.powerpack
- this.mcpStore.set('mcpServers', mcpServers)
- }
+ this.removeDeprecatedBuiltInServers(this.mcpStore.get('mcpServers') || {})
} catch (error) {
- console.error('Error occurred while removing deprecated powerpack server:', error)
+ console.error('Error occurred while removing deprecated built-in MCP servers:', error)
}
// 升级后检查并添加平台特有服务
diff --git a/src/main/presenter/deepchatAgentPresenter/dispatch.ts b/src/main/presenter/deepchatAgentPresenter/dispatch.ts
index 6874379db..37fdd809e 100644
--- a/src/main/presenter/deepchatAgentPresenter/dispatch.ts
+++ b/src/main/presenter/deepchatAgentPresenter/dispatch.ts
@@ -679,6 +679,20 @@ export async function executeTools(
}
}
+ if (hooks?.normalizeToolResult) {
+ toolRawData = {
+ ...toolRawData,
+ content: await hooks.normalizeToolResult({
+ sessionId: io.sessionId,
+ toolCallId: tc.id,
+ toolName: tc.name,
+ toolArgs: tc.arguments,
+ content: toolRawData.content,
+ isError: toolRawData.isError === true
+ })
+ }
+ }
+
const searchPayload = extractSearchPayload(
toolRawData.content,
toolContext.name,
diff --git a/src/main/presenter/deepchatAgentPresenter/index.ts b/src/main/presenter/deepchatAgentPresenter/index.ts
index 6971a592e..eaf527c24 100644
--- a/src/main/presenter/deepchatAgentPresenter/index.ts
+++ b/src/main/presenter/deepchatAgentPresenter/index.ts
@@ -52,6 +52,7 @@ import { ToolOutputGuard } from './toolOutputGuard'
import type { ProviderRequestTracePayload } from '../llmProviderPresenter/requestTrace'
import type { NewSessionHooksBridge } from '../hooksNotifications/newSessionBridge'
import { providerDbLoader } from '../configPresenter/providerDbLoader'
+import { resolveSessionVisionTarget } from '../vision/sessionVisionResolver'
type PendingInteractionEntry = {
interaction: PendingToolInteraction
@@ -107,6 +108,16 @@ const isReasoningEffort = (value: unknown): value is 'minimal' | 'low' | 'medium
const isVerbosity = (value: unknown): value is 'low' | 'medium' | 'high' =>
value === 'low' || value === 'medium' || value === 'high'
+const createAbortError = (): Error => {
+ if (typeof DOMException !== 'undefined') {
+ return new DOMException('Aborted', 'AbortError')
+ }
+
+ const error = new Error('Aborted')
+ error.name = 'AbortError'
+ return error
+}
+
export class DeepChatAgentPresenter implements IAgentImplementation {
private readonly llmProviderPresenter: ILlmProviderPresenter
private readonly configPresenter: IConfigPresenter
@@ -1011,6 +1022,23 @@ export class DeepChatAgentPresenter implements IAgentImplementation {
return undefined
}
+ private getAbortSignalForSession(sessionId: string): AbortSignal | undefined {
+ return (
+ this.activeGenerations.get(sessionId)?.abortController.signal ??
+ this.abortControllers.get(sessionId)?.signal
+ )
+ }
+
+ private throwIfAbortRequested(signal?: AbortSignal): void {
+ if (signal?.aborted) {
+ throw createAbortError()
+ }
+ }
+
+ private isAbortError(error: unknown): boolean {
+ return error instanceof Error && (error.name === 'AbortError' || error.name === 'CanceledError')
+ }
+
private dispatchResolvedToolHook(params: {
sessionId: string
messageId: string
@@ -1424,7 +1452,17 @@ export class DeepChatAgentPresenter implements IAgentImplementation {
body: gap
}
})
- }
+ },
+ normalizeToolResult: async (tool) =>
+ await this.normalizeToolResultContent({
+ sessionId: tool.sessionId,
+ toolCallId: tool.toolCallId,
+ toolName: tool.toolName,
+ toolArgs: tool.toolArgs,
+ content: tool.content,
+ isError: tool.isError,
+ abortSignal: abortController.signal
+ })
},
io: {
sessionId,
@@ -2867,7 +2905,16 @@ export class DeepChatAgentPresenter implements IAgentImplementation {
permissionRequest: rawData.permissionRequest as PendingToolInteraction['permission']
}
}
- const responseText = this.toolContentToText(rawData.content)
+ const normalizedContent = await this.normalizeToolResultContent({
+ sessionId,
+ toolCallId: toolCall.id || '',
+ toolName,
+ toolArgs: toolCall.params || '{}',
+ content: rawData.content,
+ isError: rawData.isError === true,
+ abortSignal: this.getAbortSignalForSession(sessionId)
+ })
+ const responseText = this.toolContentToText(normalizedContent)
const prepared = await this.toolOutputGuard.prepareToolOutput({
sessionId,
toolCallId: toolCall.id || '',
@@ -2956,6 +3003,199 @@ export class DeepChatAgentPresenter implements IAgentImplementation {
})
}
+ private async normalizeToolResultContent(params: {
+ sessionId: string
+ toolCallId: string
+ toolName: string
+ toolArgs: string
+ content: MCPToolResponse['content']
+ isError: boolean
+ abortSignal?: AbortSignal
+ }): Promise {
+ if (params.isError) {
+ return params.content
+ }
+
+ const abortSignal = params.abortSignal ?? this.getAbortSignalForSession(params.sessionId)
+ const screenshotPayload = this.extractScreenshotToolPayload(
+ params.toolName,
+ params.toolArgs,
+ params.content
+ )
+ if (!screenshotPayload) {
+ return params.content
+ }
+
+ try {
+ this.throwIfAbortRequested(abortSignal)
+ const visionModel = await this.resolveScreenshotVisionModel(params.sessionId, abortSignal)
+ this.throwIfAbortRequested(abortSignal)
+
+ if (!visionModel) {
+ return 'Screenshot captured, but automatic English analysis is unavailable because neither the current session model nor the agent vision model can analyze images.'
+ }
+
+ const messages: ChatMessage[] = [
+ {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: this.buildScreenshotAnalysisPrompt()
+ },
+ {
+ type: 'image_url',
+ image_url: {
+ url: screenshotPayload.dataUrl,
+ detail: 'auto'
+ }
+ }
+ ]
+ }
+ ]
+
+ const modelConfig = this.configPresenter.getModelConfig(
+ visionModel.modelId,
+ visionModel.providerId
+ )
+ const response = await this.llmProviderPresenter.generateCompletionStandalone(
+ visionModel.providerId,
+ messages,
+ visionModel.modelId,
+ modelConfig?.temperature ?? 0.2,
+ Math.min(modelConfig?.maxTokens ?? 900, 900),
+ abortSignal ? { signal: abortSignal } : undefined
+ )
+ this.throwIfAbortRequested(abortSignal)
+ const normalized = response.trim()
+ if (!normalized) {
+ return 'Screenshot captured, but automatic English analysis returned no usable description.'
+ }
+ return normalized
+ } catch (error) {
+ if (this.isAbortError(error)) {
+ return 'Screenshot captured, but automatic English analysis was canceled.'
+ }
+
+ const message = error instanceof Error ? error.message : String(error)
+ console.warn('[DeepChatAgent] Failed to normalize screenshot tool output:', {
+ sessionId: params.sessionId,
+ toolCallId: params.toolCallId,
+ error: message
+ })
+ return `Screenshot captured, but automatic English analysis failed: ${message}`
+ }
+ }
+
+ private extractScreenshotToolPayload(
+ toolName: string,
+ toolArgs: string,
+ content: MCPToolResponse['content']
+ ): { dataUrl: string } | null {
+ if (toolName !== 'cdp_send' || typeof content !== 'string') {
+ return null
+ }
+
+ const parsedArgs = this.parseJsonRecord(toolArgs)
+ if (!parsedArgs || parsedArgs.method !== 'Page.captureScreenshot') {
+ return null
+ }
+
+ const parsedContent = this.parseJsonRecord(content)
+ const rawData = typeof parsedContent?.data === 'string' ? parsedContent.data.trim() : ''
+ if (!rawData) {
+ return null
+ }
+
+ const screenshotParams = this.normalizeJsonRecord(parsedArgs.params)
+ const mimeType = this.resolveScreenshotMimeType(screenshotParams?.format)
+ const dataUrl = rawData.startsWith('data:image/')
+ ? rawData
+ : `data:${mimeType};base64,${rawData}`
+
+ return { dataUrl }
+ }
+
+ private normalizeJsonRecord(value: unknown): Record | null {
+ if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
+ return value as Record
+ }
+
+ if (typeof value !== 'string' || !value.trim()) {
+ return null
+ }
+
+ return this.parseJsonRecord(value)
+ }
+
+ private parseJsonRecord(value: string): Record | null {
+ try {
+ const parsed = JSON.parse(value) as unknown
+ if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
+ return parsed as Record
+ }
+ } catch {}
+
+ return null
+ }
+
+ private resolveScreenshotMimeType(format: unknown): string {
+ if (format === 'jpeg') {
+ return 'image/jpeg'
+ }
+ if (format === 'webp') {
+ return 'image/webp'
+ }
+ return 'image/png'
+ }
+
+ private async resolveScreenshotVisionModel(
+ sessionId: string,
+ abortSignal?: AbortSignal
+ ): Promise<{ providerId: string; modelId: string } | null> {
+ this.throwIfAbortRequested(abortSignal)
+ const state = this.runtimeState.get(sessionId)
+ const dbSession = this.sessionStore.get(sessionId)
+ const agentId = this.getSessionAgentId(sessionId) ?? 'deepchat'
+ const resolved = await resolveSessionVisionTarget({
+ providerId: state?.providerId ?? dbSession?.provider_id,
+ modelId: state?.modelId ?? dbSession?.model_id,
+ agentId,
+ configPresenter: this.configPresenter,
+ signal: abortSignal,
+ logLabel: `screenshot:${sessionId}`
+ })
+ this.throwIfAbortRequested(abortSignal)
+
+ if (!resolved) {
+ return null
+ }
+
+ if (resolved.source === 'agent-vision-model') {
+ const agentSupportsVision =
+ (await this.configPresenter.agentSupportsCapability?.(agentId, 'vision')) === true
+ this.throwIfAbortRequested(abortSignal)
+ if (!agentSupportsVision) {
+ return null
+ }
+ }
+
+ return {
+ providerId: resolved.providerId,
+ modelId: resolved.modelId
+ }
+ }
+
+ private buildScreenshotAnalysisPrompt(): string {
+ return [
+ 'Analyze this browser screenshot and respond in English only.',
+ 'Describe only what is clearly visible.',
+ 'Include the page type or layout, the most important visible text, interactive controls, status indicators, warnings, errors, and any detail that matters for the next browser action.',
+ 'Do not speculate about hidden or unreadable content.',
+ 'Return detailed plain text in a single paragraph.'
+ ].join('\n')
+ }
+
private toolContentToText(content: MCPToolResponse['content']): string {
if (typeof content === 'string') {
return content
diff --git a/src/main/presenter/deepchatAgentPresenter/types.ts b/src/main/presenter/deepchatAgentPresenter/types.ts
index 785ae76e1..df53d0936 100644
--- a/src/main/presenter/deepchatAgentPresenter/types.ts
+++ b/src/main/presenter/deepchatAgentPresenter/types.ts
@@ -6,7 +6,7 @@ import type {
} from '@shared/types/agent-interface'
import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events'
import type { ChatMessage } from '@shared/types/core/chat-message'
-import type { MCPToolDefinition } from '@shared/types/core/mcp'
+import type { MCPToolDefinition, MCPToolResponse } from '@shared/types/core/mcp'
import type { ModelConfig } from '@shared/presenter'
import type { IToolPresenter } from '@shared/types/presenters/tool.presenter'
import type { DeepChatMessageStore } from './messageStore'
@@ -76,6 +76,14 @@ export interface ProcessHooks {
reasoningContentLength: number
toolCallCount: number
}) => void
+ normalizeToolResult?: (tool: {
+ sessionId: string
+ toolCallId: string
+ toolName: string
+ toolArgs: string
+ content: MCPToolResponse['content']
+ isError: boolean
+ }) => Promise
}
export interface PendingToolInteraction {
diff --git a/src/main/presenter/index.ts b/src/main/presenter/index.ts
index 6d513c108..185eb9ccb 100644
--- a/src/main/presenter/index.ts
+++ b/src/main/presenter/index.ts
@@ -259,6 +259,18 @@ export class Presenter implements IPresenter {
return null
},
+ resolveConversationSessionInfo: async (conversationId) => {
+ const session = await this.newAgentPresenter?.getSession(conversationId)
+ if (!session) {
+ return null
+ }
+
+ return {
+ agentId: session.agentId,
+ providerId: session.providerId,
+ modelId: session.modelId
+ }
+ },
getSkillPresenter: () => this.skillPresenter,
getYoBrowserToolHandler: () => this.yoBrowserPresenter.toolHandler,
getFilePresenter: () => ({
diff --git a/src/main/presenter/llmProviderPresenter/index.ts b/src/main/presenter/llmProviderPresenter/index.ts
index d5caa1eaf..8cd0ca95c 100644
--- a/src/main/presenter/llmProviderPresenter/index.ts
+++ b/src/main/presenter/llmProviderPresenter/index.ts
@@ -33,6 +33,16 @@ import { AcpSessionPersistence } from './acp'
import { AcpProvider } from './providers/acpProvider'
import type { ProviderMcpRuntimePort } from './runtimePorts'
+const createAbortError = (): Error => {
+ if (typeof DOMException !== 'undefined') {
+ return new DOMException('Aborted', 'AbortError')
+ }
+
+ const error = new Error('Aborted')
+ error.name = 'AbortError'
+ return error
+}
+
export class LLMProviderPresenter implements ILlmProviderPresenter {
private currentProviderId: string | null = null
private readonly activeStreams: Map = new Map()
@@ -258,16 +268,37 @@ export class LLMProviderPresenter implements ILlmProviderPresenter {
messages: ChatMessage[],
modelId: string,
temperature?: number,
- maxTokens?: number
+ maxTokens?: number,
+ options?: { signal?: AbortSignal }
): Promise {
const provider = this.getProviderInstance(providerId)
let response = ''
+ const signal = options?.signal
+
+ if (signal?.aborted) {
+ throw createAbortError()
+ }
+
+ const completionPromise = provider.completions(messages, modelId, temperature, maxTokens)
+ const abortPromise =
+ signal &&
+ new Promise((_, reject) => {
+ const onAbort = () => reject(createAbortError())
+ signal.addEventListener('abort', onAbort, { once: true })
+ completionPromise.finally(() => signal.removeEventListener('abort', onAbort))
+ })
+
try {
- const llmResponse = await provider.completions(messages, modelId, temperature, maxTokens)
+ const llmResponse = await (abortPromise
+ ? Promise.race([completionPromise, abortPromise])
+ : completionPromise)
response = llmResponse.content
return response
} catch (error) {
+ if (signal?.aborted || (error instanceof Error && error.name === 'AbortError')) {
+ throw error
+ }
console.error('Stream error:', error)
return ''
}
diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts b/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts
index 9a02e88fb..62272a788 100644
--- a/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts
+++ b/src/main/presenter/mcpPresenter/inMemoryServers/builder.ts
@@ -2,21 +2,19 @@ import { ArtifactsServer } from './artifactsServer'
// FileSystemServer has been removed - filesystem capabilities are now provided via Agent tools
import { BochaSearchServer } from './bochaSearchServer'
import { BraveSearchServer } from './braveSearchServer'
-import { ImageServer } from './imageServer'
import { DifyKnowledgeServer } from './difyKnowledgeServer'
import { RagflowKnowledgeServer } from './ragflowKnowledgeServer'
import { FastGptKnowledgeServer } from './fastGptKnowledgeServer'
import { DeepResearchServer } from './deepResearchServer'
import { AutoPromptingServer } from './autoPromptingServer'
import { ConversationSearchServer } from './conversationSearchServer'
-import { MeetingServer } from './meetingServer'
import { BuiltinKnowledgeServer } from './builtinKnowledgeServer'
import { BuiltinKnowledgeConfig } from '@shared/presenter'
import { AppleServer } from './appleServer'
export function getInMemoryServer(
serverName: string,
- args: string[],
+ _args: string[],
env?: Record
) {
switch (serverName) {
@@ -29,8 +27,6 @@ export function getInMemoryServer(
return new BraveSearchServer(env)
case 'deepResearch':
return new DeepResearchServer(env)
- case 'imageServer':
- return new ImageServer(args[0] || undefined, args[1] || undefined)
case 'difyKnowledge':
return new DifyKnowledgeServer(
env as {
@@ -79,8 +75,6 @@ export function getInMemoryServer(
return new AutoPromptingServer()
case 'deepchat-inmemory/conversation-search-server':
return new ConversationSearchServer()
- case 'deepchat-inmemory/meeting-server':
- return new MeetingServer()
case 'deepchat/apple-server':
// 只在 macOS 上创建 AppleServer
if (process.platform !== 'darwin') {
diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts
deleted file mode 100644
index 370acb6eb..000000000
--- a/src/main/presenter/mcpPresenter/inMemoryServers/imageServer.ts
+++ /dev/null
@@ -1,479 +0,0 @@
-import { Server } from '@modelcontextprotocol/sdk/server/index.js'
-import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'
-import fs from 'fs/promises'
-import path from 'path'
-import { z } from 'zod'
-import { zodToJsonSchema } from 'zod-to-json-schema'
-import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js'
-import { presenter } from '@/presenter'
-import { ChatMessage, ChatMessageContent } from '@shared/presenter'
-// import { GenerateCompletionOptions } from '@/presenter/llmProviderPresenter' // Assuming this path and type exist - using any for now
-
-// --- Zod Schemas for Tool Arguments ---
-
-const ReadImageBase64ArgsSchema = z.object({
- path: z.string().describe('Path to the image file.')
-})
-
-const UploadImageArgsSchema = z.object({
- path: z.string().describe('Path to the image file to upload.')
-})
-
-const ReadMultipleImagesBase64ArgsSchema = z.object({
- paths: z.array(z.string()).describe('List of paths to the image files.')
-})
-
-const UploadMultipleImagesArgsSchema = z.object({
- paths: z.array(z.string()).describe('List of paths to the image files to upload.')
-})
-
-const QueryImageWithPromptArgsSchema = z.object({
- path: z.string().describe('Path to the image file to query.'),
- prompt: z
- .string()
- .describe('The prompt to use when querying the image with the multimodal model.')
-})
-
-const DescribeImageArgsSchema = z.object({
- path: z.string().describe('Path to the image file to do simple describe.')
-})
-
-const OcrImageArgsSchema = z.object({
- path: z.string().describe('Path to the image file for OCR text extraction.')
-})
-
-// --- Image Server Implementation ---
-
-export class ImageServer {
- private server: Server
- private provider: string
- private model: string
-
- constructor(provider?: string, model?: string) {
- const defaultVisionModel = presenter.configPresenter.getDefaultVisionModel()
- this.provider = provider || defaultVisionModel?.providerId || 'openai'
- this.model = model || defaultVisionModel?.modelId || 'gpt-4o'
- this.server = new Server(
- {
- name: 'image-processing-server',
- version: '0.1.0'
- },
- {
- capabilities: {
- tools: {}
- }
- }
- )
- this.setupRequestHandlers()
- }
-
- // No specific initialization needed for now, but can be added for upload service config
- // public async initialize(): Promise {
- // // Initialization logic, e.g., configure upload service client
- // }
-
- private getEffectiveModel(): { provider: string; model: string } {
- if (this.provider && this.model) {
- return { provider: this.provider, model: this.model }
- }
-
- const defaultVisionModel = presenter.configPresenter.getDefaultVisionModel()
- if (defaultVisionModel?.providerId && defaultVisionModel?.modelId) {
- return { provider: defaultVisionModel.providerId, model: defaultVisionModel.modelId }
- }
-
- throw new Error(
- 'No vision model configured. Please set a default vision model in Settings > Common > Default Model.'
- )
- }
-
- public startServer(transport: Transport): void {
- this.server.connect(transport)
- }
-
- // --- Placeholder for Image Upload Logic ---
- private async uploadImageToService(filePath: string, fileBuffer: Buffer): Promise {
- // TODO: Implement actual image upload logic here
- // This might involve using a library like 'axios' or a specific SDK
- // for services like Imgur, AWS S3, Cloudinary, etc.
- console.log(`Uploading ${filePath} (size: ${fileBuffer.length} bytes)...`)
- // Replace with actual upload call
- await new Promise((resolve) => setTimeout(resolve, 500)) // Simulate network delay
- const fakeUrl = `https://fake-upload-service.com/uploads/${path.basename(filePath)}_${Date.now()}`
- console.log(`Upload complete: ${fakeUrl}`)
- return fakeUrl
- }
-
- // --- Placeholder for Multimodal Model Interaction ---
- private async queryImageWithModel(
- filePath: string,
- fileBuffer: Buffer,
- prompt: string
- ): Promise {
- const { provider, model } = this.getEffectiveModel()
- // TODO: Implement actual API call to a multimodal model (e.g., GPT-4o, Gemini)
- console.log(
- `Querying ${filePath} (size: ${fileBuffer.length} bytes) using ${provider}/${model} with prompt: "${prompt}"...`
- )
-
- // Construct the messages array for the multimodal model
- const base64Image = fileBuffer.toString('base64')
- // TODO: Dynamically determine mime type if possible, otherwise assume common type like jpeg
- const dataUrl = `data:image/jpeg;base64,${base64Image}`
-
- const messages: ChatMessage[] = [
- {
- role: 'user',
- content: [
- { type: 'text', text: prompt }, // Use the provided prompt
- {
- type: 'image_url',
- image_url: { url: dataUrl }
- }
- ] as ChatMessageContent[] // Type assertion might be needed depending on ChatMessageContent definition
- }
- ]
-
- const modelConfig = presenter.configPresenter.getModelConfig(model, provider)
-
- try {
- const response = await presenter.llmproviderPresenter.generateCompletionStandalone(
- provider,
- messages,
- model,
- modelConfig?.temperature ?? 0.6,
- modelConfig?.maxTokens || 1000
- )
- console.log(`Model response received: ${response}`)
- return response ?? 'No response generated.' // Handle potential null/undefined response
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error)
- console.error(`Error querying image: ${errorMessage}`)
- // Re-throw or return an error message
- throw new Error(`Failed to query image: ${errorMessage}`)
- // Or return `Error generating response: ${errorMessage}`;
- }
- }
-
- private async ocrImageWithModel(filePath: string, fileBuffer: Buffer): Promise {
- const { provider, model } = this.getEffectiveModel()
- // TODO: Implement actual API call to an OCR service or a multimodal model capable of OCR
- console.log(
- `Requesting OCR for ${filePath} (size: ${fileBuffer.length} bytes) using ${provider}/${model}...`
- )
-
- // Construct the messages array for the multimodal model
- const base64Image = fileBuffer.toString('base64')
- // TODO: Dynamically determine mime type if possible
- const dataUrl = `data:image/jpeg;base64,${base64Image}`
-
- const messages: ChatMessage[] = [
- {
- role: 'user',
- content: [
- { type: 'text', text: 'Perform OCR on this image and return the extracted text.' },
- {
- type: 'image_url',
- image_url: { url: dataUrl }
- }
- ] as ChatMessageContent[] // Type assertion
- }
- ]
-
- console.log(messages)
-
- const modelConfig = presenter.configPresenter.getModelConfig(model, provider)
-
- try {
- const ocrText = await presenter.llmproviderPresenter.generateCompletionStandalone(
- provider,
- messages,
- model,
- modelConfig?.temperature ?? 0.6,
- modelConfig?.maxTokens || 1000
- )
- console.log(`OCR text received: ${ocrText}`)
- return ocrText ?? 'No text extracted.' // Handle potential null/undefined response
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error)
- console.error(`Error performing OCR: ${errorMessage}`)
- // Re-throw or return an error message
- throw new Error(`Failed to perform OCR: ${errorMessage}`)
- // Or return `Error performing OCR: ${errorMessage}`;
- }
- }
-
- // --- Request Handlers ---
-
- private setupRequestHandlers(): void {
- // List Tools Handler
- this.server.setRequestHandler(ListToolsRequestSchema, async () => {
- return {
- tools: [
- {
- name: 'read_image_base64',
- description:
- 'Reads an image file from the specified path and returns its base64 encoded content.',
- inputSchema: zodToJsonSchema(ReadImageBase64ArgsSchema),
- annotations: {
- title: 'Read Image Base64',
- readOnlyHint: true
- }
- },
- {
- name: 'upload_image',
- description:
- 'Uploads an image file from the specified path to a hosting service and returns the public URL.',
- inputSchema: zodToJsonSchema(UploadImageArgsSchema),
- annotations: {
- title: 'Upload Image',
- destructiveHint: false,
- openWorldHint: true
- }
- },
- {
- name: 'read_multiple_images_base64',
- description:
- 'Reads multiple image files from the specified paths and returns their base64 encoded content.',
- inputSchema: zodToJsonSchema(ReadMultipleImagesBase64ArgsSchema),
- annotations: {
- title: 'Read Multiple Images Base64',
- readOnlyHint: true
- }
- },
- {
- name: 'upload_multiple_images',
- description:
- 'Uploads multiple image files from the specified paths to a hosting service and returns their public URLs.',
- inputSchema: zodToJsonSchema(UploadMultipleImagesArgsSchema),
- annotations: {
- title: 'Upload Multiple Images',
- destructiveHint: false,
- openWorldHint: true
- }
- },
- {
- name: 'describe_image',
- description:
- 'Uses a multimodal model to simply describe the image at the specified path.',
- inputSchema: zodToJsonSchema(DescribeImageArgsSchema),
- annotations: {
- title: 'Describe Image',
- readOnlyHint: true,
- openWorldHint: true
- }
- },
- {
- name: 'query_image_with_prompt',
- description:
- 'Uses a multimodal model to answer a query (prompt) about the image at the specified path.',
- inputSchema: zodToJsonSchema(QueryImageWithPromptArgsSchema),
- annotations: {
- title: 'Query Image with Prompt',
- readOnlyHint: true,
- openWorldHint: true
- }
- },
- {
- name: 'ocr_image',
- description:
- 'Performs Optical Character Recognition (OCR) on the image at the specified path and returns the extracted text.',
- inputSchema: zodToJsonSchema(OcrImageArgsSchema),
- annotations: {
- title: 'OCR Image',
- readOnlyHint: true,
- openWorldHint: true
- }
- }
- ]
- }
- })
-
- // Call Tool Handler
- this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
- try {
- const { name, arguments: args } = request.params
-
- switch (name) {
- case 'read_image_base64': {
- const parsed = ReadImageBase64ArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- // TODO: Implement path validation if necessary (similar to FileSystemServer)
- const filePath = parsed.data.path
- const fileBuffer = await fs.readFile(filePath)
- const base64Content = fileBuffer.toString('base64')
- // Determine mime type (optional but good practice)
- // const mimeType = lookup(filePath) || 'application/octet-stream';
- // const dataUri = `data:${mimeType};base64,${base64Content}`;
- return {
- content: [{ type: 'text', text: base64Content }] // Or return dataUri
- }
- }
-
- case 'upload_image': {
- const parsed = UploadImageArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- // TODO: Implement path validation if necessary
- const filePath = parsed.data.path
- const fileBuffer = await fs.readFile(filePath)
- const imageUrl = await this.uploadImageToService(filePath, fileBuffer)
- return {
- content: [{ type: 'text', text: imageUrl }]
- }
- }
-
- case 'read_multiple_images_base64': {
- const parsed = ReadMultipleImagesBase64ArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- const results = await Promise.allSettled(
- parsed.data.paths.map(async (filePath: string) => {
- try {
- // TODO: Implement path validation if necessary
- const fileBuffer = await fs.readFile(filePath)
- return {
- path: filePath,
- base64: fileBuffer.toString('base64'),
- status: 'fulfilled'
- }
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error)
- // Ensure the structure includes path and error for rejected promises
- return Promise.reject({ path: filePath, error: errorMessage })
- }
- })
- )
-
- // Format output: [{path: string, base64?: string, error?: string}]
- const formattedResults = results.map((result) => {
- if (result.status === 'fulfilled') {
- return { path: result.value.path, base64: result.value.base64 }
- } else {
- // Access reason directly as it contains the rejected structure
- return { path: result.reason.path, error: result.reason.error }
- }
- })
-
- return {
- content: [{ type: 'text', text: JSON.stringify(formattedResults, null, 2) }]
- }
- }
-
- case 'upload_multiple_images': {
- const parsed = UploadMultipleImagesArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
-
- const results = await Promise.allSettled(
- parsed.data.paths.map(async (filePath: string) => {
- try {
- // TODO: Implement path validation if necessary
- const fileBuffer = await fs.readFile(filePath)
- const url = await this.uploadImageToService(filePath, fileBuffer)
- return { path: filePath, url: url, status: 'fulfilled' }
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error)
- // Ensure the structure includes path and error for rejected promises
- return Promise.reject({ path: filePath, error: errorMessage })
- }
- })
- )
-
- // Format output: [{path: string, url?: string, error?: string}]
- const formattedResults = results.map((result) => {
- if (result.status === 'fulfilled') {
- return { path: result.value.path, url: result.value.url }
- } else {
- // Access reason directly as it contains the rejected structure
- return { path: result.reason.path, error: result.reason.error }
- }
- })
-
- return {
- content: [{ type: 'text', text: JSON.stringify(formattedResults, null, 2) }]
- }
- }
-
- case 'describe_image': {
- const parsed = DescribeImageArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- // TODO: Implement path validation if necessary
- const filePath = parsed.data.path
- const fileBuffer = await fs.readFile(filePath)
- const description = await this.queryImageWithModel(
- filePath,
- fileBuffer,
- 'Describe this image.'
- )
- return {
- content: [{ type: 'text', text: description }]
- }
- }
-
- case 'query_image_with_prompt': {
- const parsed = QueryImageWithPromptArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- // TODO: Implement path validation if necessary
- const filePath = parsed.data.path
- const prompt = parsed.data.prompt // Get the prompt
- const fileBuffer = await fs.readFile(filePath)
- // Call the renamed function with the prompt
- const response = await this.queryImageWithModel(filePath, fileBuffer, prompt)
- return {
- content: [{ type: 'text', text: response }]
- }
- }
-
- case 'ocr_image': {
- const parsed = OcrImageArgsSchema.safeParse(args)
- if (!parsed.success) {
- throw new Error(`Invalid arguments for ${name}: ${parsed.error}`)
- }
- // TODO: Implement path validation if necessary
- const filePath = parsed.data.path
- const fileBuffer = await fs.readFile(filePath)
- const ocrText = await this.ocrImageWithModel(filePath, fileBuffer)
- return {
- content: [{ type: 'text', text: ocrText }]
- }
- }
-
- default:
- throw new Error(`Unknown tool: ${name}`)
- }
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error)
- // Consider logging the error server-side
- console.error(`Error processing tool call: ${errorMessage}`)
- // Ensure the error response structure matches expected format
- return {
- content: [{ type: 'text', text: `Error: ${errorMessage}` }],
- isError: true // Indicate this is an error response
- }
- }
- })
- }
-}
-
-// --- Usage Example (similar to FileSystemServer) ---
-// import { WebSocketServerTransport } from '@modelcontextprotocol/sdk/transport/node';
-//
-// const imageServer = new ImageServer('your-llm-provider', 'your-multimodal-model');
-// // await imageServer.initialize(); // If initialization is added
-//
-// // Example using WebSocket transport
-// const transport = new WebSocketServerTransport({ port: 8081 }); // Choose a different port
-// imageServer.startServer(transport);
-// console.log('ImageServer started on port 8081');
-
-// You would need a client to connect to this server and call the tools.
diff --git a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts
index 06583badb..8426c987f 100644
--- a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts
+++ b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts
@@ -20,6 +20,7 @@ import {
} from './chatSettingsTools'
import type { AgentToolRuntimePort } from '../runtimePorts'
import { YO_BROWSER_TOOL_NAMES } from '../../browser/YoBrowserToolDefinitions'
+import { resolveSessionVisionTarget } from '../../vision/sessionVisionResolver'
// Consider moving to a shared handlers location in future refactoring
import {
@@ -433,7 +434,7 @@ export class AgentToolManager {
function: {
name: 'read',
description:
- "Read the contents of a file. Supports pagination via offset/limit for large files (auto-truncated at 4500 chars if not specified). When invoked from a skill context with relative paths, provide base_directory as the skill's root directory.",
+ "Read the contents of a file. Supports pagination via offset/limit for large files (auto-truncated at 4500 chars if not specified). For image files, returns an English description of visible content instead of raw pixels. When invoked from a skill context with relative paths, provide base_directory as the skill's root directory.",
parameters: zodToJsonSchema(schemas.read) as {
type: string
properties: Record
@@ -721,7 +722,7 @@ export class AgentToolManager {
if (this.isImageMimeType(mimeType)) {
return {
- content: await this.readImageWithVisionFallback(validPath, mimeType)
+ content: await this.readImageWithVisionFallback(validPath, mimeType, conversationId)
}
}
@@ -1063,13 +1064,28 @@ export class AgentToolManager {
return lines.join('\n')
}
- private async readImageWithVisionFallback(filePath: string, mimeType: string): Promise {
+ private async readImageWithVisionFallback(
+ filePath: string,
+ mimeType: string,
+ conversationId?: string
+ ): Promise {
const fileBuffer = await fs.promises.readFile(filePath)
const metadata = this.buildImageMetadataBlock(filePath, mimeType, fileBuffer.length)
- const defaultVisionModel = this.configPresenter.getDefaultVisionModel?.()
+ let visionTarget: Awaited>
+
+ try {
+ visionTarget = await this.resolveVisionTargetForConversation(conversationId)
+ } catch (error) {
+ logger.warn('[AgentToolManager] Failed to resolve vision target for image read:', {
+ conversationId,
+ filePath,
+ error
+ })
+ throw error
+ }
- if (!defaultVisionModel?.providerId || !defaultVisionModel?.modelId) {
- return `${metadata}\n\nNo defaultVisionModel configured, downgraded to metadata.`
+ if (!visionTarget) {
+ return `${metadata}\n\nImage analysis unavailable because neither the current session model nor the agent vision model can analyze images.`
}
try {
@@ -1080,12 +1096,7 @@ export class AgentToolManager {
content: [
{
type: 'text',
- text: [
- 'Analyze this image and return exactly two sections.',
- 'Section 1 title: OCR',
- 'Section 2 title: Summary',
- 'Keep OCR as faithful extracted text and Summary concise.'
- ].join('\n')
+ text: this.buildImageAnalysisPrompt()
},
{
type: 'image_url',
@@ -1096,28 +1107,61 @@ export class AgentToolManager {
]
const modelConfig = this.configPresenter.getModelConfig(
- defaultVisionModel.modelId,
- defaultVisionModel.providerId
+ visionTarget.modelId,
+ visionTarget.providerId
)
const response = await this.getLlmProviderPresenter().generateCompletionStandalone(
- defaultVisionModel.providerId,
+ visionTarget.providerId,
messages,
- defaultVisionModel.modelId,
+ visionTarget.modelId,
modelConfig?.temperature ?? 0.2,
modelConfig?.maxTokens ?? 1200
)
const normalized = (response || '').trim()
if (!normalized) {
- return `${metadata}\n\nOCR:\n\nSummary:\nNo result returned by vision model.`
+ return `${metadata}\n\nImage analysis returned no usable description.`
}
- return normalized.startsWith('OCR:') ? normalized : `OCR:\n\nSummary:\n${normalized}`
+ return normalized
} catch (error) {
const message = error instanceof Error ? error.message : String(error)
return `${metadata}\n\nVision analysis failed, downgraded to metadata.\nerror: ${message}`
}
}
+ private async resolveVisionTargetForConversation(conversationId?: string) {
+ if (!conversationId) {
+ return null
+ }
+
+ try {
+ const sessionInfo = await this.runtimePort.resolveConversationSessionInfo(conversationId)
+ return await resolveSessionVisionTarget({
+ providerId: sessionInfo?.providerId,
+ modelId: sessionInfo?.modelId,
+ agentId: sessionInfo?.agentId,
+ configPresenter: this.configPresenter,
+ logLabel: `read:${conversationId}`
+ })
+ } catch (error) {
+ if (this.isConversationNotFoundError(error)) {
+ return null
+ }
+
+ throw error
+ }
+ }
+
+ private buildImageAnalysisPrompt(): string {
+ return [
+ 'Analyze this image and respond in English only.',
+ 'Describe only what is clearly visible.',
+ 'Include the main subject, scene or layout, any legible text, UI elements if present, status indicators, warnings, errors, and any detail that matters for understanding the image.',
+ 'Do not speculate about hidden or unreadable content.',
+ 'Return detailed plain text in a single paragraph.'
+ ].join('\n')
+ }
+
private assertWritePermission(
toolName: string,
args: Record,
diff --git a/src/main/presenter/toolPresenter/index.ts b/src/main/presenter/toolPresenter/index.ts
index 71dcf250c..fc03ca324 100644
--- a/src/main/presenter/toolPresenter/index.ts
+++ b/src/main/presenter/toolPresenter/index.ts
@@ -377,6 +377,11 @@ export class ToolPresenter implements IToolPresenter {
'Use `background: true` when you know a command should detach immediately; otherwise a foreground `exec` may yield a running `sessionId` after `yieldMs`.'
)
}
+ if (toolNames.has('read')) {
+ lines.push(
+ 'When `read` targets an image file, it returns an English description of the visible content and any legible text.'
+ )
+ }
if (toolNames.has('exec') && toolNames.has('read') && toolNames.has('edit')) {
lines.push(
'Recommended file task flow: `exec` for discovery/search -> `read` -> `edit`/`write`.'
diff --git a/src/main/presenter/toolPresenter/runtimePorts.ts b/src/main/presenter/toolPresenter/runtimePorts.ts
index 899540811..806b436c2 100644
--- a/src/main/presenter/toolPresenter/runtimePorts.ts
+++ b/src/main/presenter/toolPresenter/runtimePorts.ts
@@ -6,8 +6,15 @@ import type {
} from '@shared/presenter'
import type { ISkillPresenter } from '@shared/types/skill'
+export interface ConversationSessionInfo {
+ agentId: string
+ providerId: string
+ modelId: string
+}
+
export interface AgentToolRuntimePort {
resolveConversationWorkdir(conversationId: string): Promise
+ resolveConversationSessionInfo(conversationId: string): Promise
getSkillPresenter(): ISkillPresenter
getYoBrowserToolHandler(): IYoBrowserPresenter['toolHandler']
getFilePresenter(): Pick
diff --git a/src/main/presenter/vision/sessionVisionResolver.ts b/src/main/presenter/vision/sessionVisionResolver.ts
new file mode 100644
index 000000000..3dd075b3e
--- /dev/null
+++ b/src/main/presenter/vision/sessionVisionResolver.ts
@@ -0,0 +1,91 @@
+import type { IConfigPresenter } from '@shared/presenter'
+
+export type SessionVisionTarget = {
+ providerId: string
+ modelId: string
+ source: 'session-model' | 'agent-vision-model'
+}
+
+type SessionVisionResolverParams = {
+ providerId?: string | null
+ modelId?: string | null
+ agentId?: string | null
+ signal?: AbortSignal
+ configPresenter: Pick<
+ IConfigPresenter,
+ 'getModelConfig' | 'resolveDeepChatAgentConfig' | 'isKnownModel'
+ >
+ logLabel?: string
+}
+
+const createAbortError = (): Error => {
+ if (typeof DOMException !== 'undefined') {
+ return new DOMException('Aborted', 'AbortError')
+ }
+
+ const error = new Error('Aborted')
+ error.name = 'AbortError'
+ return error
+}
+
+const throwIfAbortRequested = (signal?: AbortSignal): void => {
+ if (signal?.aborted) {
+ throw createAbortError()
+ }
+}
+
+export async function resolveSessionVisionTarget(
+ params: SessionVisionResolverParams
+): Promise {
+ throwIfAbortRequested(params.signal)
+ const sessionProviderId = params.providerId?.trim()
+ const sessionModelId = params.modelId?.trim()
+ const sessionModelConfig =
+ sessionProviderId && sessionModelId
+ ? params.configPresenter.getModelConfig(sessionModelId, sessionProviderId)
+ : null
+
+ if (
+ sessionProviderId &&
+ sessionModelId &&
+ params.configPresenter.isKnownModel?.(sessionProviderId, sessionModelId) === true &&
+ sessionModelConfig?.vision
+ ) {
+ return {
+ providerId: sessionProviderId,
+ modelId: sessionModelId,
+ source: 'session-model'
+ }
+ }
+
+ const agentId = params.agentId?.trim()
+ if (!agentId) {
+ return null
+ }
+
+ try {
+ throwIfAbortRequested(params.signal)
+ const agentConfig = await params.configPresenter.resolveDeepChatAgentConfig(agentId)
+ throwIfAbortRequested(params.signal)
+ const providerId = agentConfig.visionModel?.providerId?.trim()
+ const modelId = agentConfig.visionModel?.modelId?.trim()
+ if (providerId && modelId) {
+ return {
+ providerId,
+ modelId,
+ source: 'agent-vision-model'
+ }
+ }
+ } catch (error) {
+ if (error instanceof Error && error.name === 'AbortError') {
+ throw error
+ }
+ console.warn('[Vision] Failed to resolve agent vision model:', {
+ agentId,
+ context: params.logLabel ?? 'unknown',
+ error
+ })
+ }
+
+ return null
+}
diff --git a/src/renderer/settings/components/AcpSettings.vue b/src/renderer/settings/components/AcpSettings.vue
index b90851bb4..80a11e9fe 100644
--- a/src/renderer/settings/components/AcpSettings.vue
+++ b/src/renderer/settings/components/AcpSettings.vue
@@ -75,14 +75,9 @@
{{ t('settings.acp.installedSectionDescription') }}
-
-
- {{ t('settings.acp.installedCount', { count: installedRegistryAgents.length }) }}
-
-
-
+
+ {{ t('settings.acp.installedCount', { count: installedRegistryAgents.length }) }}
+
{{ t('settings.acp.installedEmptyDescription') }}
-
diff --git a/src/renderer/settings/components/common/DefaultModelSettingsSection.vue b/src/renderer/settings/components/common/DefaultModelSettingsSection.vue
index cef7bd378..7a2ef692e 100644
--- a/src/renderer/settings/components/common/DefaultModelSettingsSection.vue
+++ b/src/renderer/settings/components/common/DefaultModelSettingsSection.vue
@@ -68,42 +68,6 @@
-
-
-
{{
- t('settings.common.defaultModel.visionModel')
- }}
-
-
-
-
-
-
-
-
-
-
-
@@ -119,7 +83,6 @@ import ModelIcon from '@/components/icons/ModelIcon.vue'
import { useThemeStore } from '@/stores/theme'
import { useModelStore } from '@/stores/modelStore'
import { usePresenter } from '@/composables/usePresenter'
-import { ModelType } from '@shared/model'
import type { RENDERER_MODEL_META } from '@shared/presenter'
const { t } = useI18n()
@@ -129,7 +92,6 @@ const configPresenter = usePresenter('configPresenter')
const assistantModelSelectOpen = ref(false)
const chatModelSelectOpen = ref(false)
-const visionModelSelectOpen = ref(false)
interface SelectedModel {
providerId: string
@@ -138,7 +100,6 @@ interface SelectedModel {
const selectedAssistantModel = ref(null)
const selectedChatModel = ref(null)
-const selectedVisionModel = ref(null)
let isSyncingModelDefaults = false
const selectBySetting = (
@@ -164,7 +125,7 @@ const selectBySetting = (
}
const persistModelSetting = async (
- key: 'assistantModel' | 'defaultModel' | 'defaultVisionModel',
+ key: 'assistantModel' | 'defaultModel',
previous: { providerId: string; modelId: string } | undefined,
current: SelectedModel | null
): Promise => {
@@ -198,15 +159,6 @@ const handleChatModelSelect = async (
chatModelSelectOpen.value = false
}
-const handleVisionModelSelect = async (
- model: RENDERER_MODEL_META,
- providerId: string
-): Promise => {
- selectedVisionModel.value = { providerId, model }
- await configPresenter.setSetting('defaultVisionModel', { providerId, modelId: model.id })
- visionModelSelectOpen.value = false
-}
-
const syncModelSelections = async (): Promise => {
if (isSyncingModelDefaults) {
return
@@ -219,9 +171,6 @@ const syncModelSelections = async (): Promise => {
const defaultModelSetting = (await configPresenter.getSetting('defaultModel')) as
| { providerId: string; modelId: string }
| undefined
- const defaultVisionModelSetting = (await configPresenter.getSetting('defaultVisionModel')) as
- | { providerId: string; modelId: string }
- | undefined
const chatSelection = selectBySetting(
defaultModelSetting,
@@ -233,21 +182,11 @@ const syncModelSelections = async (): Promise => {
(_model, providerId) => providerId !== 'acp'
)
- const visionSelection = selectBySetting(
- defaultVisionModelSetting,
- (model, providerId) =>
- providerId !== 'acp' &&
- Boolean(model.vision) &&
- (model.type === ModelType.Chat || model.type === ModelType.ImageGeneration)
- )
-
selectedChatModel.value = chatSelection
selectedAssistantModel.value = assistantSelection
- selectedVisionModel.value = visionSelection
await persistModelSetting('defaultModel', defaultModelSetting, chatSelection)
await persistModelSetting('assistantModel', assistantModelSetting, assistantSelection)
- await persistModelSetting('defaultVisionModel', defaultVisionModelSetting, visionSelection)
} catch (error) {
console.error('Failed to sync model selections:', error)
} finally {
diff --git a/src/renderer/src/components/mcp-config/mcpServerForm.vue b/src/renderer/src/components/mcp-config/mcpServerForm.vue
index 8b1ca56bb..8c5d32923 100644
--- a/src/renderer/src/components/mcp-config/mcpServerForm.vue
+++ b/src/renderer/src/components/mcp-config/mcpServerForm.vue
@@ -18,16 +18,12 @@ import { EmojiPicker } from '@/components/emoji-picker'
import { useToast } from '@/components/use-toast'
import { Icon } from '@iconify/vue'
import { X } from 'lucide-vue-next'
-import ModelIcon from '@/components/icons/ModelIcon.vue'
-import { useModelStore } from '@/stores/modelStore'
import { usePresenter } from '@/composables/usePresenter'
import { nanoid } from 'nanoid'
const { t } = useI18n()
const { toast } = useToast()
-const modelStore = useModelStore()
const devicePresenter = usePresenter('devicePresenter')
-const configPresenter = usePresenter('configPresenter')
const props = defineProps<{
serverName?: string
initialConfig?: MCPServerConfig
@@ -57,14 +53,8 @@ const customHeadersFocused = ref(false)
const customHeadersDisplayValue = ref('')
const npmRegistry = ref(props.initialConfig?.customNpmRegistry || '')
-// imageServer 展示用(只读,来源于 defaultVisionModel)
-const selectedImageModelName = ref('')
-const selectedImageModelProvider = ref('')
-
// 判断是否是inmemory类型
const isInMemoryType = computed(() => type.value === 'inmemory')
-// 判断是否是imageServer
-const isImageServer = computed(() => isInMemoryType.value && name.value === 'imageServer')
// 判断是否是buildInFileSystem
const isBuildInFileSystem = computed(
() => isInMemoryType.value && name.value === 'buildInFileSystem'
@@ -80,32 +70,6 @@ const formatJsonHeaders = (headers: Record): string => {
.map(([key, value]) => `${key}=${value}`)
.join('\n')
}
-const refreshImageServerDefaultModelDisplay = async (): Promise => {
- if (!isImageServer.value) {
- selectedImageModelName.value = ''
- selectedImageModelProvider.value = ''
- return
- }
-
- const defaultVisionModel = (await configPresenter.getSetting('defaultVisionModel')) as
- | { providerId: string; modelId: string }
- | undefined
- if (!defaultVisionModel?.providerId || !defaultVisionModel?.modelId) {
- selectedImageModelName.value = ''
- selectedImageModelProvider.value = ''
- return
- }
-
- selectedImageModelProvider.value = defaultVisionModel.providerId
- const providerEntry = modelStore.enabledModels.find(
- (entry) => entry.providerId === defaultVisionModel.providerId
- )
- const resolvedModel = providerEntry?.models.find(
- (model) => model.id === defaultVisionModel.modelId
- )
- selectedImageModelName.value =
- resolvedModel?.name || `${defaultVisionModel.providerId}/${defaultVisionModel.modelId}`
-}
// 获取内置服务器的本地化名称和描述
const getLocalizedName = computed(() => {
@@ -144,11 +108,9 @@ const jsonConfig = ref('')
const showBaseUrl = computed(() => isRemoteType.value)
// 添加计算属性来控制命令相关字段的显示
const showCommandFields = computed(() => type.value === 'stdio')
-// 控制参数输入框的显示 (stdio 或 非imageServer且非buildInFileSystem的inmemory)
+// 控制参数输入框的显示 (stdio 或 非buildInFileSystem的inmemory)
const showArgsInput = computed(
- () =>
- showCommandFields.value ||
- (isInMemoryType.value && !isImageServer.value && !isBuildInFileSystem.value)
+ () => showCommandFields.value || (isInMemoryType.value && !isBuildInFileSystem.value)
)
// 控制文件夹选择界面的显示 (仅针对 buildInFileSystem)
@@ -253,11 +215,11 @@ const isNameValid = computed(() => name.value.trim().length > 0)
const isCommandValid = computed(() => {
// 对于SSE类型,命令不是必需的
if (isRemoteType.value) return true
- // 对于STDIO 或 inmemory 类型,命令是必需的 (排除内置 server)
- if (type.value === 'stdio' || (isInMemoryType.value && !isImageServer.value)) {
+ // 对于STDIO 或 inmemory 类型,命令是必需的
+ if (type.value === 'stdio' || isInMemoryType.value) {
return command.value.trim().length > 0
}
- return true // 其他情况(如 imageServer)默认有效
+ return true
})
const isEnvValid = computed(() => {
try {
@@ -473,11 +435,9 @@ const handleSubmit = (): void => {
}
} else {
// STDIO 或 inmemory 类型的服务器
- const normalizedArgs = isImageServer.value
- ? []
- : isBuildInFileSystem.value
- ? foldersList.value.filter((folder) => folder.trim().length > 0)
- : argsRows.value.map((row) => row.value.trim()).filter((value) => value.length > 0)
+ const normalizedArgs = isBuildInFileSystem.value
+ ? foldersList.value.filter((folder) => folder.trim().length > 0)
+ : argsRows.value.map((row) => row.value.trim()).filter((value) => value.length > 0)
serverConfig = {
...baseConfig,
command: command.value.trim(),
@@ -592,15 +552,6 @@ watch(
{ immediate: true }
)
-// imageServer 仅展示默认视觉模型,不再通过 args 配置
-watch(
- [() => name.value, () => type.value, () => modelStore.enabledModels],
- () => {
- void refreshImageServerDefaultModelDisplay()
- },
- { immediate: true, deep: true }
-)
-
// Watch for initial config changes (primarily for edit mode)
watch(
() => props.initialConfig,
@@ -793,25 +744,6 @@ HTTP-Referer=deepchatai.cn`
/>
-
-
-
-
-
- {{
- selectedImageModelName || t('settings.mcp.serverForm.imageModel')
- }}
-
-
-