From dc60d95d7329278263f2d87f15b8d323fff49f7c Mon Sep 17 00:00:00 2001 From: zerob13 Date: Sun, 29 Mar 2026 21:12:19 +0800 Subject: [PATCH 1/3] feat(remote): add tool interactions --- docs/specs/remote-tool-interactions/plan.md | 71 ++++ docs/specs/remote-tool-interactions/spec.md | 47 +++ package.json | 4 +- .../feishu/feishuClient.ts | 32 +- .../feishu/feishuInteractionPrompt.ts | 136 +++++++ .../feishu/feishuRuntime.ts | 54 ++- .../services/feishuCommandRouter.ts | 187 +++++++++- .../services/remoteBindingStore.ts | 60 ++++ .../services/remoteCommandRouter.ts | 337 +++++++++++++++++- .../services/remoteConversationRunner.ts | 163 ++++++++- .../services/remoteInteraction.ts | 243 +++++++++++++ .../telegram/telegramInteractionPrompt.ts | 168 +++++++++ .../telegram/telegramOutbound.ts | 17 +- .../telegram/telegramPoller.ts | 32 +- .../presenter/remoteControlPresenter/types.ts | 142 ++++++++ .../feishuCommandRouter.test.ts | 190 +++++++++- .../feishuRuntime.test.ts | 123 ++++++- .../remoteBindingStore.test.ts | 22 ++ .../remoteCommandRouter.test.ts | 238 +++++++++++-- .../remoteConversationRunner.test.ts | 232 +++++++++++- .../telegramOutbound.test.ts | 4 +- .../telegramPoller.test.ts | 103 +++++- 22 files changed, 2511 insertions(+), 94 deletions(-) create mode 100644 docs/specs/remote-tool-interactions/plan.md create mode 100644 docs/specs/remote-tool-interactions/spec.md create mode 100644 src/main/presenter/remoteControlPresenter/feishu/feishuInteractionPrompt.ts create mode 100644 src/main/presenter/remoteControlPresenter/services/remoteInteraction.ts create mode 100644 src/main/presenter/remoteControlPresenter/telegram/telegramInteractionPrompt.ts diff --git a/docs/specs/remote-tool-interactions/plan.md b/docs/specs/remote-tool-interactions/plan.md new file mode 100644 index 000000000..cc6d46c3e --- /dev/null +++ b/docs/specs/remote-tool-interactions/plan.md @@ -0,0 +1,71 @@ +# Remote Tool Interactions Plan + +## Summary + +Implement a structured remote interaction loop for Telegram and Feishu by extending the runner snapshot model, teaching the routers to pause around pending interactions, and adding channel-specific prompt rendering plus response parsing. + +## Main Process Changes + +- Extend `RemoteConversationSnapshot` and runner status with `pendingInteraction`. +- Parse assistant `tool_call_permission` and `question_request` action blocks into a shared `RemotePendingInteraction` model. +- Add `RemoteConversationRunner.getPendingInteraction()` and `respondToPendingInteraction()` so routers can resolve paused tool interactions without creating a new turn. +- Keep follow-up polling on the same assistant message after a tool interaction response, allowing chained interactions to surface one by one. + +## Router Flow + +- Check for a current pending interaction before routing mutable commands or plain text. +- Allow `/help`, `/status`, `/open`, and `/pending` while blocking `/new`, `/use`, `/model`, and unrelated plain-text turns. +- Add `/pending` to both channel command lists and make `/status` report the current waiting interaction summary. +- Parse remote replies into `ToolInteractionResponse`: + - Telegram/Feishu permission: `ALLOW` / `DENY` + - Telegram/Feishu question: option number or exact label + - Custom/plain-text answers when `custom` is allowed or `multiple` is true + +## Telegram Delivery + +- Add callback token state for pending interactions in `RemoteBindingStore`. +- Render permission prompts with inline `Allow` / `Deny`. +- Render single-choice questions with inline option buttons plus `Other` when custom answers are allowed. +- On callback expiry, re-read the current pending interaction and refresh the prompt instead of hard-failing. +- After a callback resolves, edit the interaction message into a resolved state, then continue conversation polling if the agent resumes. + +## Feishu Delivery + +- Add card-style prompt builders for permission and question states. +- Extend `FeishuClient` and `FeishuRuntime` with outbound `sendCard` support. +- Prefer card delivery and fall back to plain text if card sending fails. +- Keep all responses text-based from the user side; do not add card-click callbacks. + +## Data Model + +- `RemotePendingInteraction` + - `type` + - `messageId` + - `toolCallId` + - `toolName` + - `toolArgs` + - optional permission metadata + - optional question metadata +- `TelegramPendingInteractionState` + - `endpointKey` + - `messageId` + - `toolCallId` + - `createdAt` +- `FeishuOutboundAction` + - `sendText` + - `sendCard` + +## Risks And Mitigations + +- Stale callback tokens: rebind tokens to the current endpoint/message/tool call and refresh prompts when the interaction still exists. +- Session drift while waiting: block session-switching commands until the current interaction is resolved. +- Card delivery instability in Feishu: fall back to plain text and keep parsing on inbound text only. + +## Test Strategy + +- Runner tests for extracting pending interactions, responding to them, and continuing chained execution. +- Telegram router tests for button/text approval flows, `/pending`, and expired callback refresh. +- Telegram poller tests for sending prompt messages after a completed assistant response with `pendingInteraction`. +- Feishu router tests for permission/question text parsing and `/pending` card prompts. +- Feishu runtime tests for card delivery and card-to-text fallback. +- Binding-store tests for pending interaction token lifecycle. diff --git a/docs/specs/remote-tool-interactions/spec.md b/docs/specs/remote-tool-interactions/spec.md new file mode 100644 index 000000000..864b29e86 --- /dev/null +++ b/docs/specs/remote-tool-interactions/spec.md @@ -0,0 +1,47 @@ +# Remote Tool Interactions + +## Summary + +Extend remote control so Telegram and Feishu can surface structured pending tool interactions instead of collapsing them into a generic desktop-only notice. Remote users must be able to resolve permission requests and `user ask` style questions from the chat channel itself, while the desktop app keeps the existing agent execution and permission backends. + +## User Stories + +- As a Telegram remote user, I can approve or deny a tool permission request directly from inline buttons. +- As a Telegram remote user, I can answer a pending question by tapping an option or replying with text when custom input is allowed. +- As a Feishu remote user, I can see a clear card-style prompt for a pending permission or question and reply with a supported text answer. +- As a desktop user, I do not lose remote session continuity when a tool interaction pauses the assistant. +- As a paired remote user, I can ask the bot to re-show the current pending interaction without opening the desktop app. + +## Acceptance Criteria + +- `RemoteConversationSnapshot` includes `pendingInteraction` with structured `permission` or `question` data when the latest assistant message is waiting on user action. +- Remote delivery no longer relies on the generic "Desktop confirmation is required" path as the primary behavior. +- Telegram pending permission prompts render inline `Allow` and `Deny` buttons and also accept `ALLOW` / `DENY` text replies. +- Telegram single-choice question prompts render inline option buttons and an `Other` button when custom answers are allowed. +- Telegram multi-answer questions do not render fake multi-select buttons and instruct the user to reply with plain text. +- Expired Telegram interaction callback tokens refresh the prompt when the underlying pending interaction still exists. +- Feishu pending prompts render as interactive-card style outbound messages when possible and fall back to plain text when card delivery fails. +- Feishu accepts `ALLOW` / `DENY`, option numbers, exact option labels, and custom text according to the pending question shape. +- `/pending` re-sends the current prompt for both Telegram and Feishu. +- While a pending interaction exists, `/new`, `/use`, `/model`, and plain new-turn messages are blocked from creating unrelated session state changes. +- `/help`, `/status`, `/open`, and `/pending` remain available while a pending interaction exists. +- Existing remote pairing, binding, `/open`, `/status`, and normal non-interaction conversations continue to work. + +## Constraints + +- Keep all logic in Electron main; do not add a new renderer IPC surface for this feature. +- Telegram continues to use callback-query buttons; Feishu does not introduce a public HTTP callback service for card clicks. +- Remote bot copy remains English in this increment. +- Each endpoint only resolves the first pending interaction for its bound session at a time. + +## Non-Goals + +- Feishu clickable approval callbacks. +- Locale negotiation for remote bot messages. +- Arbitrary rich remote workflows beyond permission requests and question requests. + +## Compatibility + +- Existing Telegram and Feishu bindings remain valid. +- Existing remote sessions continue to use `RemoteConversationRunner` and detached session creation. +- Structured pending interaction handling is additive and only changes how remote channels render and answer paused assistant states. diff --git a/package.json b/package.json index 16abbc9d9..51aaa9ef0 100644 --- a/package.json +++ b/package.json @@ -73,7 +73,7 @@ "@larksuiteoapi/node-sdk": "^1.60.0", "@modelcontextprotocol/sdk": "^1.28.0", "axios": "^1.13.6", - "better-sqlite3-multiple-ciphers": "12.4.1", + "better-sqlite3-multiple-ciphers": "12.8.0", "cheerio": "^1.2.0", "chokidar": "^5.0.0", "compare-versions": "^6.1.1", @@ -148,7 +148,7 @@ "clsx": "^2.1.1", "cross-env": "^10.1.0", "dayjs": "^1.11.19", - "electron": "^37.10.3", + "electron": "^39.8.5", "electron-builder": "26.0.12", "electron-vite": "^4.0.1", "jsdom": "^26.1.0", diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts index 44c7476e3..cdab7e184 100644 --- a/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts @@ -1,6 +1,6 @@ import * as Lark from '@larksuiteoapi/node-sdk' import type { EventHandles } from '@larksuiteoapi/node-sdk' -import type { FeishuTransportTarget } from '../types' +import type { FeishuInteractiveCardPayload, FeishuTransportTarget } from '../types' const FEISHU_OUTBOUND_TEXT_LIMIT = 8_000 @@ -18,6 +18,8 @@ const createTextPayload = (text: string): string => text }) +const createCardPayload = (card: FeishuInteractiveCardPayload): string => JSON.stringify(card) + const chunkFeishuText = (text: string): string[] => { const normalized = text.trim() || '(No text output)' if (normalized.length <= FEISHU_OUTBOUND_TEXT_LIMIT) { @@ -157,4 +159,32 @@ export class FeishuClient { }) } } + + async sendCard(target: FeishuTransportTarget, card: FeishuInteractiveCardPayload): Promise { + const content = createCardPayload(card) + if (target.replyToMessageId) { + await this.sdk.im.message.reply({ + path: { + message_id: target.replyToMessageId + }, + data: { + content, + msg_type: 'interactive', + reply_in_thread: Boolean(target.threadId) + } + }) + return + } + + await this.sdk.im.message.create({ + params: { + receive_id_type: 'chat_id' + }, + data: { + receive_id: target.chatId, + msg_type: 'interactive', + content + } + }) + } } diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuInteractionPrompt.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuInteractionPrompt.ts new file mode 100644 index 000000000..6b9869bd9 --- /dev/null +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuInteractionPrompt.ts @@ -0,0 +1,136 @@ +import type { FeishuInteractiveCardPayload, RemotePendingInteraction } from '../types' + +const createMarkdownBlock = (content: string): Record => ({ + tag: 'markdown', + content +}) + +const createDivider = (): Record => ({ + tag: 'hr' +}) + +export const buildFeishuPendingInteractionText = ( + interaction: RemotePendingInteraction +): string => { + if (interaction.type === 'permission') { + const permission = interaction.permission + const command = permission?.command || permission?.commandInfo?.command || '' + return [ + 'Permission Required', + permission?.permissionType ? `Type: ${permission.permissionType}` : '', + interaction.toolName ? `Tool: ${interaction.toolName}` : '', + command + ? `Command: ${command}` + : interaction.toolArgs + ? `Arguments: ${interaction.toolArgs}` + : '', + permission?.serverName ? `Server: ${permission.serverName}` : '', + '', + permission?.description?.trim() || '', + '', + 'Reply with ALLOW or DENY.' + ] + .filter(Boolean) + .join('\n') + } + + const question = interaction.question + return [ + 'Question', + question?.header?.trim() || '', + question?.question?.trim() || interaction.toolName || 'Answer required', + '', + ...(question?.options?.map((option, index) => + option.description?.trim() + ? `${index + 1}. ${option.label} - ${option.description.trim()}` + : `${index + 1}. ${option.label}` + ) ?? []), + '', + question?.multiple + ? 'Reply with your answer in plain text.' + : question?.custom !== false + ? 'Reply with the option number / label / your own answer.' + : 'Reply with the option number or exact label.' + ] + .filter(Boolean) + .join('\n') +} + +export const buildFeishuPendingInteractionCard = ( + interaction: RemotePendingInteraction +): FeishuInteractiveCardPayload => { + const fallbackText = buildFeishuPendingInteractionText(interaction) + const question = interaction.question + const permission = interaction.permission + const command = permission?.command || permission?.commandInfo?.command || '' + + const fields: string[] = [] + if (interaction.type === 'permission' && permission?.permissionType) { + fields.push(`**Type:** ${permission.permissionType}`) + } + if (interaction.toolName) { + fields.push(`**Tool:** ${interaction.toolName}`) + } + if (command) { + fields.push(`**Command:** ${command}`) + } else if (interaction.toolArgs.trim()) { + fields.push(`**Arguments:** ${interaction.toolArgs.trim()}`) + } + if (permission?.serverName) { + fields.push(`**Server:** ${permission.serverName}`) + } + + const instructions = + interaction.type === 'permission' + ? 'Reply with `ALLOW` or `DENY`.' + : question?.multiple + ? 'Reply in plain text with your answer.' + : question?.custom !== false + ? 'Reply with the option number, exact label, or your own answer.' + : 'Reply with the option number or exact label.' + + return { + config: { + wide_screen_mode: true, + enable_forward: true + }, + header: { + title: { + tag: 'plain_text', + content: interaction.type === 'permission' ? 'Permission Required' : 'Question' + }, + template: interaction.type === 'permission' ? 'orange' : 'blue' + }, + elements: [ + ...(fields.length ? [createMarkdownBlock(fields.join('\n'))] : []), + ...(interaction.type === 'permission' && permission?.description?.trim() + ? [createDivider(), createMarkdownBlock(permission.description.trim())] + : []), + ...(interaction.type === 'question' + ? [ + ...(question?.header?.trim() + ? [createMarkdownBlock(`**${question.header.trim()}**`)] + : []), + createMarkdownBlock(question?.question?.trim() || 'Answer required'), + ...(question?.options?.length + ? [ + createDivider(), + createMarkdownBlock( + question.options + .map((option, index) => + option.description?.trim() + ? `${index + 1}. ${option.label} - ${option.description.trim()}` + : `${index + 1}. ${option.label}` + ) + .join('\n') + ) + ] + : []) + ] + : []), + createDivider(), + createMarkdownBlock(instructions), + createMarkdownBlock(`\`\`\`\n${fallbackText}\n\`\`\``) + ] + } +} diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts index 1f01d2362..281785a1a 100644 --- a/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts @@ -5,11 +5,16 @@ import { TELEGRAM_STREAM_POLL_INTERVAL_MS, buildFeishuEndpointKey, type FeishuInboundMessage, + type FeishuOutboundAction, type FeishuRuntimeStatusSnapshot, type FeishuTransportTarget } from '../types' import { FeishuCommandRouter } from '../services/feishuCommandRouter' import type { RemoteConversationExecution } from '../services/remoteConversationRunner' +import { + buildFeishuPendingInteractionCard, + buildFeishuPendingInteractionText +} from './feishuInteractionPrompt' import { FeishuClient, type FeishuBotIdentity } from './feishuClient' import { FeishuParser } from './feishuParser' @@ -261,6 +266,10 @@ export class FeishuRuntime { await this.deps.client.sendText(target, reply) } + if (routed.outboundActions?.length) { + await this.dispatchOutboundActions(target, routed.outboundActions, runId) + } + if (routed.conversation) { await this.deliverConversation(target, routed.conversation, runId) } @@ -322,7 +331,23 @@ export class FeishuRuntime { if (!this.isCurrentRun(runId)) { return } - await this.deps.client.sendText(target, snapshot.text) + if (snapshot.text.trim()) { + await this.deps.client.sendText(target, snapshot.text) + } + if (snapshot.pendingInteraction) { + await this.dispatchOutboundActions( + target, + [ + { + type: 'sendCard', + card: buildFeishuPendingInteractionCard(snapshot.pendingInteraction), + fallbackText: buildFeishuPendingInteractionText(snapshot.pendingInteraction) + } + ], + runId + ) + return + } return } @@ -350,6 +375,33 @@ export class FeishuRuntime { }) } + private async dispatchOutboundActions( + target: FeishuTransportTarget, + actions: FeishuOutboundAction[], + runId: number + ): Promise { + for (const action of actions) { + if (!this.isCurrentRun(runId)) { + return + } + + if (action.type === 'sendText') { + await this.deps.client.sendText(target, action.text) + continue + } + + try { + await this.deps.client.sendCard(target, action.card) + } catch (error) { + console.warn( + '[FeishuRuntime] Failed to send interactive card, falling back to text:', + error + ) + await this.deps.client.sendText(target, action.fallbackText) + } + } + } + private setStatus( patch: Partial & { state?: FeishuRuntimeStatusSnapshot['state'] diff --git a/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts b/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts index 614d983f9..f340507d9 100644 --- a/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts +++ b/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts @@ -1,19 +1,22 @@ +import type { ToolInteractionResponse } from '@shared/types/agent-interface' import type { SessionWithState } from '@shared/types/agent-interface' -import { - FEISHU_REMOTE_COMMANDS, - buildFeishuBindingMeta, - buildFeishuEndpointKey, - type FeishuInboundMessage, - type FeishuRuntimeStatusSnapshot, - type TelegramModelProviderOption +import type { + FeishuInboundMessage, + FeishuOutboundAction, + FeishuRuntimeStatusSnapshot, + RemotePendingInteraction, + TelegramModelProviderOption } from '../types' +import { FEISHU_REMOTE_COMMANDS, buildFeishuBindingMeta, buildFeishuEndpointKey } from '../types' import type { RemoteConversationExecution } from './remoteConversationRunner' +import { buildFeishuPendingInteractionCard } from '../feishu/feishuInteractionPrompt' import { FeishuAuthGuard } from './feishuAuthGuard' import { RemoteBindingStore } from './remoteBindingStore' import { RemoteConversationRunner } from './remoteConversationRunner' export interface FeishuCommandRouteResult { replies: string[] + outboundActions?: FeishuOutboundAction[] conversation?: RemoteConversationExecution } @@ -24,6 +27,8 @@ type FeishuCommandRouterDeps = { getRuntimeStatus: () => FeishuRuntimeStatusSnapshot } +const FEISHU_PENDING_ALLOWED_COMMANDS = new Set(['start', 'help', 'status', 'open', 'pending']) + export class FeishuCommandRouter { constructor(private readonly deps: FeishuCommandRouterDeps) {} @@ -75,6 +80,23 @@ export class FeishuCommandRouter { } try { + const pendingInteraction = await this.deps.runner.getPendingInteraction(endpointKey) + if (pendingInteraction) { + if (!command) { + return await this.handlePendingTextResponse(endpointKey, message.text, pendingInteraction) + } + + if (command === 'pending') { + return this.buildPendingPromptResult(pendingInteraction) + } + + if (!FEISHU_PENDING_ALLOWED_COMMANDS.has(command)) { + return { + replies: [this.formatPendingCommandBlockedMessage(pendingInteraction)] + } + } + } + switch (command) { case 'new': { const title = message.command?.args?.trim() @@ -143,6 +165,11 @@ export class FeishuCommandRouter { } } + case 'pending': + return { + replies: ['No pending interaction is waiting.'] + } + case 'model': return await this.handleModelCommand(message, endpointKey) @@ -161,6 +188,7 @@ export class FeishuCommandRouter { `Current agent: ${status.session?.agentId ?? 'none'}`, `Current model: ${status.session?.modelId ?? 'none'}`, `Generating: ${status.isGenerating ? 'yes' : 'no'}`, + `Waiting: ${status.pendingInteraction ? this.formatPendingStatus(status.pendingInteraction) : 'none'}`, `Paired users: ${feishuConfig.pairedUserOpenIds.length}`, `Bindings: ${Object.keys(feishuConfig.bindings).length}`, `Last error: ${runtime.lastError ?? 'none'}` @@ -245,6 +273,123 @@ export class FeishuCommandRouter { } } + private async handlePendingTextResponse( + endpointKey: string, + text: string, + interaction: RemotePendingInteraction + ): Promise { + const response = this.resolvePendingTextResponse(text, interaction) + if (!response) { + return { + replies: [this.formatPendingTextReplyHint(interaction)] + } + } + + const result = await this.deps.runner.respondToPendingInteraction(endpointKey, response) + return { + replies: [ + result.waitingForUserMessage + ? 'Reply with your answer in your next message.' + : this.describeInteractionResponse(interaction, response) + ], + ...(result.execution ? { conversation: result.execution } : {}) + } + } + + private buildPendingPromptResult( + interaction: RemotePendingInteraction + ): FeishuCommandRouteResult { + return { + replies: [], + outboundActions: [ + { + type: 'sendCard', + card: buildFeishuPendingInteractionCard(interaction), + fallbackText: this.formatPendingTextReplyHint(interaction) + } + ] + } + } + + private resolvePendingTextResponse( + text: string, + interaction: RemotePendingInteraction + ): ToolInteractionResponse | null { + const normalized = text.trim() + if (!normalized) { + return null + } + + if (interaction.type === 'permission') { + const lowered = normalized.toLowerCase() + if (lowered === 'allow') { + return { kind: 'permission', granted: true } + } + if (lowered === 'deny') { + return { kind: 'permission', granted: false } + } + return null + } + + const question = interaction.question + if (!question) { + return null + } + + if (!question.multiple) { + const optionIndex = Number.parseInt(normalized, 10) + if ( + Number.isInteger(optionIndex) && + optionIndex > 0 && + optionIndex <= question.options.length + ) { + return { + kind: 'question_option', + optionLabel: question.options[optionIndex - 1].label + } + } + + const matchedOption = question.options.find( + (option) => + option.label.localeCompare(normalized, undefined, { sensitivity: 'accent' }) === 0 + ) + if (matchedOption) { + return { + kind: 'question_option', + optionLabel: matchedOption.label + } + } + } + + if (question.multiple || question.custom !== false) { + return { + kind: 'question_custom', + answerText: normalized + } + } + + return null + } + + private describeInteractionResponse( + interaction: RemotePendingInteraction, + response: ToolInteractionResponse + ): string { + if (interaction.type === 'permission' && response.kind === 'permission') { + return response.granted ? 'Approved. Continuing...' : 'Denied.' + } + + if (response.kind === 'question_option') { + return `Selected: ${response.optionLabel}` + } + + if (response.kind === 'question_custom') { + return `Answer received: ${response.answerText.trim()}` + } + + return 'Reply with your answer in a new message.' + } + private formatModelOverview( session: SessionWithState, providers: TelegramModelProviderOption[] @@ -264,6 +409,31 @@ export class FeishuCommandRouter { ].join('\n') } + private formatPendingTextReplyHint(interaction: RemotePendingInteraction): string { + if (interaction.type === 'permission') { + return 'Reply with ALLOW or DENY.' + } + + if (interaction.question?.multiple) { + return 'Reply with your answer in plain text.' + } + + if (interaction.question?.custom !== false) { + return 'Reply with an option number, exact label, or your own answer.' + } + + return 'Reply with an option number or exact label.' + } + + private formatPendingCommandBlockedMessage(interaction: RemotePendingInteraction): string { + return `Resolve the pending ${interaction.type} first. Send /pending to review it again.` + } + + private formatPendingStatus(interaction: RemotePendingInteraction): string { + const toolLabel = interaction.toolName.trim() || 'unknown tool' + return `${interaction.type} via ${toolLabel}` + } + private formatStartMessage(isAuthorized: boolean): string { if (isAuthorized) { return [ @@ -281,7 +451,8 @@ export class FeishuCommandRouter { private formatHelpMessage(): string { return [ 'DeepChat Feishu Remote commands:', - ...FEISHU_REMOTE_COMMANDS.map((item) => `/${item.command} - ${item.description}`) + ...FEISHU_REMOTE_COMMANDS.map((item) => `/${item.command} - ${item.description}`), + 'Plain text sends to the current bound session unless a tool interaction is waiting.' ].join('\n') } diff --git a/src/main/presenter/remoteControlPresenter/services/remoteBindingStore.ts b/src/main/presenter/remoteControlPresenter/services/remoteBindingStore.ts index d4c8ada4d..1315a8d2a 100644 --- a/src/main/presenter/remoteControlPresenter/services/remoteBindingStore.ts +++ b/src/main/presenter/remoteControlPresenter/services/remoteBindingStore.ts @@ -1,6 +1,7 @@ import type { IConfigPresenter, RemoteChannel } from '@shared/presenter' import { REMOTE_CONTROL_SETTING_KEY, + TELEGRAM_INTERACTION_CALLBACK_TTL_MS, TELEGRAM_MODEL_MENU_TTL_MS, normalizeRemoteControlConfig, createPairCode, @@ -13,7 +14,9 @@ import { type RemoteControlConfig, type RemoteEndpointBinding, type RemoteEndpointBindingMeta, + type RemotePendingInteraction, type TelegramInboundEvent, + type TelegramPendingInteractionState, type TelegramModelMenuState, type TelegramPairingState, type TelegramRemoteRuntimeConfig @@ -23,6 +26,7 @@ export class RemoteBindingStore { private readonly activeEvents = new Map() private readonly sessionSnapshots = new Map() private readonly modelMenuStates = new Map() + private readonly pendingInteractionStates = new Map() constructor(private readonly configPresenter: IConfigPresenter) {} @@ -113,6 +117,7 @@ export class RemoteBindingStore { })) this.activeEvents.delete(endpointKey) this.clearModelMenuStatesForEndpoint(endpointKey) + this.clearPendingInteractionStatesForEndpoint(endpointKey) } clearBinding(endpointKey: string): void { @@ -442,6 +447,43 @@ export class RemoteBindingStore { this.modelMenuStates.delete(token) } + createPendingInteractionState( + endpointKey: string, + interaction: Pick + ): string { + this.clearExpiredPendingInteractionStates() + this.clearPendingInteractionStatesForEndpoint(endpointKey) + const token = createTelegramCallbackToken() + this.pendingInteractionStates.set(token, { + endpointKey, + createdAt: Date.now(), + messageId: interaction.messageId, + toolCallId: interaction.toolCallId + }) + return token + } + + getPendingInteractionState(token: string, ttlMs: number = TELEGRAM_INTERACTION_CALLBACK_TTL_MS) { + this.clearExpiredPendingInteractionStates() + const state = this.pendingInteractionStates.get(token) + if (!state) { + return null + } + + if (Date.now() - state.createdAt > ttlMs) { + this.pendingInteractionStates.delete(token) + return null + } + + return { + ...state + } + } + + clearPendingInteractionState(token: string): void { + this.pendingInteractionStates.delete(token) + } + private getChannelBindings(channel: RemoteChannel): Record { const config = this.getChannelConfig(channel) return config.bindings @@ -481,6 +523,7 @@ export class RemoteBindingStore { this.activeEvents.delete(endpointKey) this.sessionSnapshots.delete(endpointKey) this.clearModelMenuStatesForEndpoint(endpointKey) + this.clearPendingInteractionStatesForEndpoint(endpointKey) } private clearExpiredModelMenuStates(): void { @@ -499,4 +542,21 @@ export class RemoteBindingStore { } } } + + private clearExpiredPendingInteractionStates(): void { + const now = Date.now() + for (const [token, state] of this.pendingInteractionStates.entries()) { + if (now - state.createdAt > TELEGRAM_INTERACTION_CALLBACK_TTL_MS) { + this.pendingInteractionStates.delete(token) + } + } + } + + private clearPendingInteractionStatesForEndpoint(endpointKey: string): void { + for (const [token, state] of this.pendingInteractionStates.entries()) { + if (state.endpointKey === endpointKey) { + this.pendingInteractionStates.delete(token) + } + } + } } diff --git a/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts b/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts index 1d424c51e..926662464 100644 --- a/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts +++ b/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts @@ -1,4 +1,6 @@ +import type { ToolInteractionResponse } from '@shared/types/agent-interface' import type { + RemotePendingInteraction, TelegramCallbackAnswer, TelegramInboundCallbackQuery, TelegramInboundEvent, @@ -6,17 +8,24 @@ import type { TelegramInlineKeyboardMarkup, TelegramModelProviderOption, TelegramOutboundAction, + TelegramPendingInteractionCallback, TelegramPollerStatusSnapshot } from '../types' import { + TELEGRAM_INTERACTION_CALLBACK_TTL_MS, TELEGRAM_MODEL_MENU_TTL_MS, TELEGRAM_REMOTE_COMMANDS, buildModelMenuBackCallbackData, buildModelMenuCancelCallbackData, buildModelMenuChoiceCallbackData, buildModelMenuProviderCallbackData, - parseModelMenuCallbackData + parseModelMenuCallbackData, + parsePendingInteractionCallbackData } from '../types' +import { + buildTelegramInteractionResolvedText, + buildTelegramPendingInteractionPrompt +} from '../telegram/telegramInteractionPrompt' import type { RemoteConversationExecution } from './remoteConversationRunner' import { RemoteAuthGuard } from './remoteAuthGuard' import { RemoteBindingStore } from './remoteBindingStore' @@ -36,6 +45,8 @@ type RemoteCommandRouterDeps = { getPollerStatus: () => TelegramPollerStatusSnapshot } +const TELEGRAM_PENDING_ALLOWED_COMMANDS = new Set(['start', 'help', 'status', 'open', 'pending']) + export class RemoteCommandRouter { constructor(private readonly deps: RemoteCommandRouterDeps) {} @@ -80,6 +91,23 @@ export class RemoteCommandRouter { } try { + const pendingInteraction = await this.deps.runner.getPendingInteraction(endpointKey) + if (pendingInteraction) { + if (!command) { + return await this.handlePendingTextResponse(endpointKey, message.text, pendingInteraction) + } + + if (command === 'pending') { + return this.buildPendingPromptResult(endpointKey, pendingInteraction) + } + + if (!TELEGRAM_PENDING_ALLOWED_COMMANDS.has(command)) { + return { + replies: [this.formatPendingCommandBlockedMessage(pendingInteraction)] + } + } + } + switch (command) { case 'new': { const title = message.command?.args?.trim() @@ -144,6 +172,12 @@ export class RemoteCommandRouter { } } + case 'pending': { + return { + replies: ['No pending interaction is waiting.'] + } + } + case 'model': { const session = await this.deps.runner.getCurrentSession(endpointKey) if (!session) { @@ -193,6 +227,7 @@ export class RemoteCommandRouter { `Current agent: ${status.session?.agentId ?? 'none'}`, `Current model: ${status.session?.modelId ?? 'none'}`, `Generating: ${status.isGenerating ? 'yes' : 'no'}`, + `Waiting: ${status.pendingInteraction ? this.formatPendingStatus(status.pendingInteraction) : 'none'}`, `Allowed users: ${telegramConfig.allowlist.length}`, `Bindings: ${Object.keys(telegramConfig.bindings).length}`, `Last error: ${runtime.lastError ?? 'none'}` @@ -231,6 +266,22 @@ export class RemoteCommandRouter { } } + const pendingCallback = parsePendingInteractionCallbackData(event.data) + if (pendingCallback) { + return await this.handlePendingCallbackQuery(event, endpointKey, pendingCallback) + } + + const pendingInteraction = await this.deps.runner.getPendingInteraction(endpointKey) + if (pendingInteraction) { + return { + replies: [], + callbackAnswer: { + text: this.formatPendingCommandBlockedMessage(pendingInteraction), + showAlert: true + } + } + } + const callback = parseModelMenuCallbackData(event.data) if (!callback) { return { @@ -358,6 +409,79 @@ export class RemoteCommandRouter { } } + private async handlePendingCallbackQuery( + event: TelegramInboundCallbackQuery, + endpointKey: string, + callback: TelegramPendingInteractionCallback + ): Promise { + const interaction = await this.deps.runner.getPendingInteraction(endpointKey) + const state = this.deps.bindingStore.getPendingInteractionState( + callback.token, + TELEGRAM_INTERACTION_CALLBACK_TTL_MS + ) + + if ( + !interaction || + !state || + state.endpointKey !== endpointKey || + state.messageId !== interaction.messageId || + state.toolCallId !== interaction.toolCallId + ) { + return await this.buildExpiredPendingInteractionResult(event.messageId, endpointKey) + } + + const response = this.resolvePendingCallbackResponse(interaction, callback) + if (!response) { + return await this.buildExpiredPendingInteractionResult(event.messageId, endpointKey) + } + + this.deps.bindingStore.clearPendingInteractionState(callback.token) + + const result = await this.deps.runner.respondToPendingInteraction(endpointKey, response) + return { + replies: [], + outboundActions: [ + { + type: 'editMessageText', + messageId: event.messageId, + text: buildTelegramInteractionResolvedText({ + interaction, + responseText: this.describeInteractionResponse(interaction, response), + waitingForUserMessage: result.waitingForUserMessage + }), + replyMarkup: null + } + ], + ...(result.execution ? { conversation: result.execution } : {}), + callbackAnswer: { + text: result.waitingForUserMessage ? 'Reply with your answer.' : 'Continuing...' + } + } + } + + private async handlePendingTextResponse( + endpointKey: string, + text: string, + interaction: RemotePendingInteraction + ): Promise { + const response = this.resolvePendingTextResponse(text, interaction) + if (!response) { + return { + replies: [this.formatPendingTextReplyHint(interaction)] + } + } + + const result = await this.deps.runner.respondToPendingInteraction(endpointKey, response) + return { + replies: [ + result.waitingForUserMessage + ? 'Reply with your answer in your next message.' + : this.describeInteractionResponse(interaction, response) + ], + ...(result.execution ? { conversation: result.execution } : {}) + } + } + private buildExpiredMenuResult(messageId: number): RemoteCommandRouteResult { return { replies: [], @@ -376,6 +500,44 @@ export class RemoteCommandRouter { } } + private async buildExpiredPendingInteractionResult( + messageId: number, + endpointKey: string + ): Promise { + const interaction = await this.deps.runner.getPendingInteraction(endpointKey) + if (!interaction) { + return { + replies: [], + outboundActions: [ + { + type: 'editMessageText', + messageId, + text: 'Pending interaction expired. Run /pending if another action is waiting.', + replyMarkup: null + } + ], + callbackAnswer: { + text: 'Pending interaction expired.', + showAlert: true + } + } + } + + const prompt = this.createPendingPromptAction( + endpointKey, + interaction, + 'editMessageText', + messageId + ) + return { + replies: [], + outboundActions: [prompt], + callbackAnswer: { + text: 'Prompt refreshed.' + } + } + } + private buildProviderMenuKeyboard( token: string, providers: TelegramModelProviderOption[] @@ -425,6 +587,177 @@ export class RemoteCommandRouter { } } + private buildPendingPromptResult( + endpointKey: string, + interaction: RemotePendingInteraction + ): RemoteCommandRouteResult { + return { + replies: [], + outboundActions: [this.createPendingPromptAction(endpointKey, interaction, 'sendMessage')] + } + } + + private createPendingPromptAction( + endpointKey: string, + interaction: RemotePendingInteraction, + mode: 'sendMessage' | 'editMessageText', + messageId?: number + ): TelegramOutboundAction { + const token = this.deps.bindingStore.createPendingInteractionState(endpointKey, interaction) + const prompt = buildTelegramPendingInteractionPrompt(interaction, token) + if (mode === 'editMessageText' && typeof messageId === 'number') { + return { + type: 'editMessageText', + messageId, + text: prompt.text, + replyMarkup: prompt.replyMarkup ?? null + } + } + + return { + type: 'sendMessage', + text: prompt.text, + ...(prompt.replyMarkup ? { replyMarkup: prompt.replyMarkup } : {}) + } + } + + private resolvePendingCallbackResponse( + interaction: RemotePendingInteraction, + callback: TelegramPendingInteractionCallback + ): ToolInteractionResponse | null { + if (interaction.type === 'permission') { + if (callback.action === 'allow') { + return { kind: 'permission', granted: true } + } + if (callback.action === 'deny') { + return { kind: 'permission', granted: false } + } + return null + } + + if (callback.action === 'other') { + return { kind: 'question_other' } + } + + if (callback.action !== 'option') { + return null + } + + const option = interaction.question?.options?.[callback.optionIndex] + if (!option) { + return null + } + + return { + kind: 'question_option', + optionLabel: option.label + } + } + + private resolvePendingTextResponse( + text: string, + interaction: RemotePendingInteraction + ): ToolInteractionResponse | null { + const normalized = text.trim() + if (!normalized) { + return null + } + + if (interaction.type === 'permission') { + const lowered = normalized.toLowerCase() + if (lowered === 'allow') { + return { kind: 'permission', granted: true } + } + if (lowered === 'deny') { + return { kind: 'permission', granted: false } + } + return null + } + + const question = interaction.question + if (!question) { + return null + } + + if (!question.multiple) { + const optionByIndex = Number.parseInt(normalized, 10) + if ( + Number.isInteger(optionByIndex) && + optionByIndex > 0 && + optionByIndex <= question.options.length + ) { + return { + kind: 'question_option', + optionLabel: question.options[optionByIndex - 1].label + } + } + + const matchedOption = question.options.find( + (option) => + option.label.localeCompare(normalized, undefined, { sensitivity: 'accent' }) === 0 + ) + if (matchedOption) { + return { + kind: 'question_option', + optionLabel: matchedOption.label + } + } + } + + if (question.multiple || question.custom !== false) { + return { + kind: 'question_custom', + answerText: normalized + } + } + + return null + } + + private describeInteractionResponse( + interaction: RemotePendingInteraction, + response: ToolInteractionResponse + ): string { + if (interaction.type === 'permission' && response.kind === 'permission') { + return response.granted ? 'Approved. Continuing...' : 'Denied.' + } + + if (response.kind === 'question_option') { + return `Selected: ${response.optionLabel}` + } + + if (response.kind === 'question_custom') { + return `Answer received: ${response.answerText.trim()}` + } + + return 'Reply with your answer in a new message.' + } + + private formatPendingTextReplyHint(interaction: RemotePendingInteraction): string { + if (interaction.type === 'permission') { + return 'Reply with ALLOW or DENY, or use /pending to show the buttons again.' + } + + if (interaction.question?.multiple) { + return 'Reply with your answer in plain text.' + } + + if (interaction.question?.custom !== false) { + return 'Reply with an option number, exact label, or your own answer.' + } + + return 'Reply with an option number or exact label, or use /pending to show the buttons again.' + } + + private formatPendingCommandBlockedMessage(interaction: RemotePendingInteraction): string { + return `Resolve the pending ${interaction.type} first. Use /pending to review it again.` + } + + private formatPendingStatus(interaction: RemotePendingInteraction): string { + const toolLabel = interaction.toolName.trim() || 'unknown tool' + return `${interaction.type} via ${toolLabel}` + } + private formatStartMessage(isAuthorized: boolean): string { const statusLine = isAuthorized ? 'Status: paired' @@ -449,7 +782,7 @@ export class RemoteCommandRouter { ? '/use - Bind a listed session' : `/${item.command} - ${item.description}` ), - 'Plain text sends to the current bound session.' + 'Plain text sends to the current bound session unless a tool interaction is waiting.' ].join('\n') } diff --git a/src/main/presenter/remoteControlPresenter/services/remoteConversationRunner.ts b/src/main/presenter/remoteControlPresenter/services/remoteConversationRunner.ts index 1a1ae6b8a..f9eb5f6f1 100644 --- a/src/main/presenter/remoteControlPresenter/services/remoteConversationRunner.ts +++ b/src/main/presenter/remoteControlPresenter/services/remoteConversationRunner.ts @@ -1,5 +1,9 @@ import { BrowserWindow } from 'electron' -import type { ChatMessageRecord, SessionWithState } from '@shared/types/agent-interface' +import type { + ChatMessageRecord, + SessionWithState, + ToolInteractionResponse +} from '@shared/types/agent-interface' import type { IConfigPresenter, INewAgentPresenter, @@ -11,6 +15,7 @@ import { TELEGRAM_RECENT_SESSION_LIMIT, TELEGRAM_STREAM_POLL_INTERVAL_MS, type RemoteEndpointBindingMeta, + type RemotePendingInteraction, type TelegramModelProviderOption } from '../types' import { @@ -19,6 +24,7 @@ import { safeParseAssistantBlocks } from '../telegram/telegramOutbound' import { RemoteBindingStore } from './remoteBindingStore' +import { collectPendingInteraction } from './remoteInteraction' const sleep = async (ms: number): Promise => { await new Promise((resolve) => setTimeout(resolve, ms)) @@ -28,6 +34,7 @@ export interface RemoteConversationSnapshot { messageId: string | null text: string completed: boolean + pendingInteraction: RemotePendingInteraction | null } export interface RemoteConversationExecution { @@ -40,6 +47,7 @@ export interface RemoteRunnerStatus { session: SessionWithState | null activeEventId: string | null isGenerating: boolean + pendingInteraction: RemotePendingInteraction | null } export type RemoteOpenSessionResult = @@ -67,6 +75,10 @@ type ChatWindowLookupPresenter = ITabPresenter & { getWindowType(windowId: number): 'chat' | 'browser' } +type PendingInteractionDetails = RemotePendingInteraction & { + messageOrderSeq: number +} + export class RemoteConversationRunner { constructor( private readonly deps: RemoteConversationRunnerDeps, @@ -213,15 +225,68 @@ export class RemoteConversationRunner { this.bindingStore.rememberActiveEvent(endpointKey, seededMessage.id) } + return this.createExecution(endpointKey, session.id, { + afterOrderSeq: lastOrderSeq, + preferredMessageId: seededMessage?.id ?? null, + ignoreMessageId: previousActiveEventId + }) + } + + async getPendingInteraction(endpointKey: string): Promise { + const session = await this.getCurrentSession(endpointKey) + if (!session) { + return null + } + + const interaction = await this.getCurrentPendingInteractionDetails(session.id) + if (!interaction) { + return null + } + + const { messageOrderSeq: _messageOrderSeq, ...rest } = interaction + return rest + } + + async respondToPendingInteraction( + endpointKey: string, + response: ToolInteractionResponse + ): Promise<{ + waitingForUserMessage: boolean + execution: RemoteConversationExecution | null + }> { + const session = await this.getCurrentSession(endpointKey) + if (!session) { + throw new Error('No bound session. Send a message, /new, or /use first.') + } + + const interaction = await this.getCurrentPendingInteractionDetails(session.id) + if (!interaction) { + throw new Error('No pending interaction was found.') + } + + const result = await this.deps.newAgentPresenter.respondToolInteraction( + session.id, + interaction.messageId, + interaction.toolCallId, + response + ) + + this.bindingStore.clearActiveEvent(endpointKey) + + if (result.waitingForUserMessage) { + return { + waitingForUserMessage: true, + execution: null + } + } + return { - sessionId: session.id, - eventId: seededMessage?.id ?? null, - getSnapshot: async () => - await this.getConversationSnapshot(endpointKey, session.id, { - afterOrderSeq: lastOrderSeq, - preferredMessageId: seededMessage?.id ?? null, - ignoreMessageId: previousActiveEventId - }) + waitingForUserMessage: false, + execution: this.createExecution(endpointKey, session.id, { + afterOrderSeq: Math.max(0, interaction.messageOrderSeq - 1), + preferredMessageId: interaction.messageId, + ignoreMessageId: null + }) } } @@ -279,10 +344,13 @@ export class RemoteConversationRunner { return { session: null, activeEventId: null, - isGenerating: false + isGenerating: false, + pendingInteraction: null } } + const pendingInteraction = await this.getCurrentPendingInteractionDetails(session.id) + const activeEventId = this.bindingStore.getActiveEvent(endpointKey) ?? this.deps.deepchatAgentPresenter.getActiveGeneration(session.id)?.eventId ?? @@ -291,7 +359,11 @@ export class RemoteConversationRunner { return { session, activeEventId, - isGenerating: Boolean(activeEventId) || session.status === 'generating' + isGenerating: + !pendingInteraction && (Boolean(activeEventId) || session.status === 'generating'), + pendingInteraction: pendingInteraction + ? this.stripPendingInteractionDetails(pendingInteraction) + : null } } @@ -319,7 +391,8 @@ export class RemoteConversationRunner { return { messageId: null, text: 'The bound session no longer exists.', - completed: true + completed: true, + pendingInteraction: null } } @@ -343,14 +416,21 @@ export class RemoteConversationRunner { return { messageId: null, text: completed ? 'No assistant response was produced.' : '', - completed + completed, + pendingInteraction: null } } const blocks = safeParseAssistantBlocks(trackedMessage.content) + const pendingInteraction = collectPendingInteraction( + trackedMessage.id, + trackedMessage.orderSeq, + blocks + ) const completed = - trackedMessage.status !== 'pending' && - (!activeGeneration || activeGeneration.eventId !== trackedMessage.id) + Boolean(pendingInteraction) || + (trackedMessage.status !== 'pending' && + (!activeGeneration || activeGeneration.eventId !== trackedMessage.id)) if (completed) { this.bindingStore.clearActiveEvent(endpointKey) @@ -358,8 +438,15 @@ export class RemoteConversationRunner { return { messageId: trackedMessage.id, - text: completed ? buildTelegramFinalText(blocks) : extractTelegramDraftText(blocks), - completed + text: pendingInteraction + ? extractTelegramDraftText(blocks) + : completed + ? buildTelegramFinalText(blocks) + : extractTelegramDraftText(blocks), + completed, + pendingInteraction: pendingInteraction + ? this.stripPendingInteractionDetails(pendingInteraction) + : null } } @@ -443,6 +530,48 @@ export class RemoteConversationRunner { return assistants.sort((left, right) => right.orderSeq - left.orderSeq)[0] } + private createExecution( + endpointKey: string, + sessionId: string, + tracking: { + afterOrderSeq: number + preferredMessageId: string | null + ignoreMessageId: string | null + } + ): RemoteConversationExecution { + return { + sessionId, + eventId: tracking.preferredMessageId, + getSnapshot: async () => await this.getConversationSnapshot(endpointKey, sessionId, tracking) + } + } + + private async getCurrentPendingInteractionDetails( + sessionId: string + ): Promise { + const messages = await this.deps.newAgentPresenter.getMessages(sessionId) + const assistants = [...messages] + .filter((message) => message.role === 'assistant') + .sort((left, right) => right.orderSeq - left.orderSeq) + + for (const message of assistants) { + const blocks = safeParseAssistantBlocks(message.content) + const interaction = collectPendingInteraction(message.id, message.orderSeq, blocks) + if (interaction) { + return interaction + } + } + + return null + } + + private stripPendingInteractionDetails( + interaction: PendingInteractionDetails + ): RemotePendingInteraction { + const { messageOrderSeq: _messageOrderSeq, ...rest } = interaction + return rest + } + private async resolveChatWindow(): Promise { const tabPresenter = this.deps.tabPresenter as ChatWindowLookupPresenter const chatWindows = this.deps.windowPresenter diff --git a/src/main/presenter/remoteControlPresenter/services/remoteInteraction.ts b/src/main/presenter/remoteControlPresenter/services/remoteInteraction.ts new file mode 100644 index 000000000..d63a1d799 --- /dev/null +++ b/src/main/presenter/remoteControlPresenter/services/remoteInteraction.ts @@ -0,0 +1,243 @@ +import type { AssistantMessageBlock, QuestionOption } from '@shared/types/agent-interface' +import type { + RemotePendingInteraction, + RemotePendingInteractionPermission, + RemotePermissionCommandInfo +} from '../types' + +type RemotePendingInteractionWithOrder = RemotePendingInteraction & { + messageOrderSeq: number +} + +const isPermissionType = ( + value: unknown +): value is RemotePendingInteractionPermission['permissionType'] => + value === 'read' || value === 'write' || value === 'all' || value === 'command' + +const parseQuestionOption = (value: unknown): QuestionOption | null => { + if (!value || typeof value !== 'object') { + return null + } + + const candidate = value as { label?: unknown; description?: unknown } + if (typeof candidate.label !== 'string') { + return null + } + + const label = candidate.label.trim() + if (!label) { + return null + } + + if (typeof candidate.description === 'string' && candidate.description.trim()) { + return { + label, + description: candidate.description.trim() + } + } + + return { label } +} + +export const parseQuestionOptions = (raw: unknown): QuestionOption[] => { + if (Array.isArray(raw)) { + return raw + .map((item) => parseQuestionOption(item)) + .filter((item): item is QuestionOption => Boolean(item)) + } + + if (typeof raw === 'string' && raw.trim()) { + try { + const parsed = JSON.parse(raw) as unknown + if (Array.isArray(parsed)) { + return parsed + .map((item) => parseQuestionOption(item)) + .filter((item): item is QuestionOption => Boolean(item)) + } + } catch { + return [] + } + } + + return [] +} + +const parseCommandInfo = (raw: unknown): RemotePermissionCommandInfo | undefined => { + const candidate = + typeof raw === 'string' && raw.trim() + ? (() => { + try { + return JSON.parse(raw) as unknown + } catch { + return null + } + })() + : raw + + if (!candidate || typeof candidate !== 'object' || Array.isArray(candidate)) { + return undefined + } + + const value = candidate as Record + if (typeof value.command !== 'string' || !value.command.trim()) { + return undefined + } + + const riskLevel = + value.riskLevel === 'low' || + value.riskLevel === 'medium' || + value.riskLevel === 'high' || + value.riskLevel === 'critical' + ? value.riskLevel + : 'medium' + + return { + command: value.command.trim(), + riskLevel, + suggestion: typeof value.suggestion === 'string' ? value.suggestion.trim() : '', + ...(typeof value.signature === 'string' && value.signature.trim() + ? { signature: value.signature.trim() } + : {}), + ...(typeof value.baseCommand === 'string' && value.baseCommand.trim() + ? { baseCommand: value.baseCommand.trim() } + : {}) + } +} + +export const parsePermissionPayload = ( + block: AssistantMessageBlock +): RemotePendingInteractionPermission | undefined => { + const rawPayload = block.extra?.permissionRequest + if (typeof rawPayload === 'string' && rawPayload.trim()) { + try { + const parsed = JSON.parse(rawPayload) as Record + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + const commandInfo = parseCommandInfo(parsed.commandInfo) + return { + permissionType: isPermissionType(parsed.permissionType) ? parsed.permissionType : 'write', + description: + typeof parsed.description === 'string' && parsed.description.trim() + ? parsed.description + : typeof block.content === 'string' + ? block.content + : '', + ...(typeof parsed.toolName === 'string' && parsed.toolName.trim() + ? { toolName: parsed.toolName.trim() } + : {}), + ...(typeof parsed.serverName === 'string' && parsed.serverName.trim() + ? { serverName: parsed.serverName.trim() } + : {}), + ...(typeof parsed.providerId === 'string' && parsed.providerId.trim() + ? { providerId: parsed.providerId.trim() } + : {}), + ...(typeof parsed.requestId === 'string' && parsed.requestId.trim() + ? { requestId: parsed.requestId.trim() } + : {}), + ...(parsed.rememberable === false ? { rememberable: false } : { rememberable: true }), + ...(typeof parsed.command === 'string' && parsed.command.trim() + ? { command: parsed.command } + : {}), + ...(typeof parsed.commandSignature === 'string' && parsed.commandSignature.trim() + ? { commandSignature: parsed.commandSignature.trim() } + : {}), + ...(Array.isArray(parsed.paths) + ? { + paths: parsed.paths.filter( + (item): item is string => typeof item === 'string' && item.trim().length > 0 + ) + } + : {}), + ...(commandInfo ? { commandInfo } : {}) + } + } + } catch { + // Ignore malformed serialized permission payloads and fall back to block fields. + } + } + + const permissionType = block.extra?.permissionType + const commandInfo = parseCommandInfo(block.extra?.commandInfo) + return { + permissionType: isPermissionType(permissionType) ? permissionType : 'write', + description: typeof block.content === 'string' ? block.content : '', + ...(typeof block.extra?.toolName === 'string' && block.extra.toolName.trim() + ? { toolName: block.extra.toolName.trim() } + : block.tool_call?.name + ? { toolName: block.tool_call.name } + : {}), + ...(typeof block.extra?.serverName === 'string' && block.extra.serverName.trim() + ? { serverName: block.extra.serverName.trim() } + : block.tool_call?.server_name + ? { serverName: block.tool_call.server_name } + : {}), + ...(typeof block.extra?.providerId === 'string' && block.extra.providerId.trim() + ? { providerId: block.extra.providerId.trim() } + : {}), + ...(typeof block.extra?.permissionRequestId === 'string' && + block.extra.permissionRequestId.trim() + ? { requestId: block.extra.permissionRequestId.trim() } + : {}), + ...(block.extra?.rememberable === false ? { rememberable: false } : { rememberable: true }), + ...(commandInfo ? { commandInfo } : {}) + } +} + +export const collectPendingInteraction = ( + messageId: string, + messageOrderSeq: number, + blocks: AssistantMessageBlock[] +): RemotePendingInteractionWithOrder | null => { + for (const block of blocks) { + if ( + block.type !== 'action' || + (block.action_type !== 'tool_call_permission' && block.action_type !== 'question_request') || + block.status !== 'pending' || + block.extra?.needsUserAction === false + ) { + continue + } + + const toolCallId = block.tool_call?.id + if (!toolCallId) { + continue + } + + const base = { + messageId, + messageOrderSeq, + toolCallId, + toolName: block.tool_call?.name || '', + toolArgs: block.tool_call?.params || '', + ...(block.tool_call?.server_name ? { serverName: block.tool_call.server_name } : {}), + ...(block.tool_call?.server_icons ? { serverIcons: block.tool_call.server_icons } : {}), + ...(block.tool_call?.server_description + ? { serverDescription: block.tool_call.server_description } + : {}) + } + + if (block.action_type === 'question_request') { + return { + ...base, + type: 'question', + question: { + header: typeof block.extra?.questionHeader === 'string' ? block.extra.questionHeader : '', + question: + typeof block.extra?.questionText === 'string' + ? block.extra.questionText + : block.content || '', + options: parseQuestionOptions(block.extra?.questionOptions), + custom: block.extra?.questionCustom !== false, + multiple: Boolean(block.extra?.questionMultiple) + } + } + } + + return { + ...base, + type: 'permission', + permission: parsePermissionPayload(block) + } + } + + return null +} diff --git a/src/main/presenter/remoteControlPresenter/telegram/telegramInteractionPrompt.ts b/src/main/presenter/remoteControlPresenter/telegram/telegramInteractionPrompt.ts new file mode 100644 index 000000000..df08b5ba3 --- /dev/null +++ b/src/main/presenter/remoteControlPresenter/telegram/telegramInteractionPrompt.ts @@ -0,0 +1,168 @@ +import type { + RemotePendingInteraction, + TelegramInlineKeyboardButton, + TelegramInlineKeyboardMarkup +} from '../types' +import { + buildPendingInteractionAllowCallbackData, + buildPendingInteractionDenyCallbackData, + buildPendingInteractionOptionCallbackData, + buildPendingInteractionOtherCallbackData +} from '../types' + +const chunkButtons = ( + buttons: TelegramInlineKeyboardButton[], + rowSize: number +): TelegramInlineKeyboardButton[][] => { + const rows: TelegramInlineKeyboardButton[][] = [] + for (let index = 0; index < buttons.length; index += rowSize) { + rows.push(buttons.slice(index, index + rowSize)) + } + return rows +} + +const formatPermissionBody = (interaction: RemotePendingInteraction): string => { + const permission = interaction.permission + const lines = ['Permission Required'] + + if (permission?.permissionType) { + lines.push(`Type: ${permission.permissionType}`) + } + + if (interaction.toolName) { + lines.push(`Tool: ${interaction.toolName}`) + } + + const command = permission?.command || permission?.commandInfo?.command || '' + if (command) { + lines.push(`Command: ${command}`) + } else if (interaction.toolArgs.trim()) { + lines.push(`Arguments: ${interaction.toolArgs.trim()}`) + } + + if (permission?.serverName) { + lines.push(`Server: ${permission.serverName}`) + } + + if (permission?.description?.trim()) { + lines.push('') + lines.push(permission.description.trim()) + } + + lines.push('') + lines.push('Tap a button or reply with ALLOW / DENY.') + return lines.join('\n') +} + +const formatQuestionBody = (interaction: RemotePendingInteraction): string => { + const question = interaction.question + const lines = ['Question'] + + if (question?.header?.trim()) { + lines.push(question.header.trim()) + } + + lines.push(question?.question?.trim() || interaction.toolName || 'Answer required') + + if (question?.options?.length) { + lines.push('') + lines.push( + ...question.options.map((option, index) => + option.description?.trim() + ? `${index + 1}. ${option.label} - ${option.description.trim()}` + : `${index + 1}. ${option.label}` + ) + ) + } + + lines.push('') + if (question?.multiple) { + lines.push('Reply with your answer in plain text.') + } else if (question?.custom !== false) { + lines.push('Tap an option, or reply with the option number / label / your own answer.') + } else { + lines.push('Tap an option, or reply with the option number / exact label.') + } + + return lines.join('\n') +} + +export const buildTelegramPendingInteractionPrompt = ( + interaction: RemotePendingInteraction, + token: string +): { + text: string + replyMarkup?: TelegramInlineKeyboardMarkup +} => { + if (interaction.type === 'permission') { + return { + text: formatPermissionBody(interaction), + replyMarkup: { + inline_keyboard: [ + [ + { + text: 'Allow', + callback_data: buildPendingInteractionAllowCallbackData(token) + }, + { + text: 'Deny', + callback_data: buildPendingInteractionDenyCallbackData(token) + } + ] + ] + } + } + } + + const question = interaction.question + if (!question) { + return { + text: formatQuestionBody(interaction) + } + } + + if (question.multiple) { + return { + text: formatQuestionBody(interaction) + } + } + + const optionButtons = chunkButtons( + question.options.map((option, index) => ({ + text: option.label, + callback_data: buildPendingInteractionOptionCallbackData(token, index) + })), + 2 + ) + + if (question.custom !== false) { + optionButtons.push([ + { + text: 'Other', + callback_data: buildPendingInteractionOtherCallbackData(token) + } + ]) + } + + return { + text: formatQuestionBody(interaction), + ...(optionButtons.length ? { replyMarkup: { inline_keyboard: optionButtons } } : {}) + } +} + +export const buildTelegramInteractionResolvedText = (params: { + interaction: RemotePendingInteraction + responseText: string + waitingForUserMessage?: boolean +}): string => { + if (params.waitingForUserMessage) { + return 'Reply with your answer in a new message.' + } + + return [ + params.interaction.type === 'permission' ? 'Permission handled.' : 'Answer recorded.', + params.responseText.trim() + ] + .filter(Boolean) + .join('\n') +} diff --git a/src/main/presenter/remoteControlPresenter/telegram/telegramOutbound.ts b/src/main/presenter/remoteControlPresenter/telegram/telegramOutbound.ts index 3ef44ed3d..1a3613fc9 100644 --- a/src/main/presenter/remoteControlPresenter/telegram/telegramOutbound.ts +++ b/src/main/presenter/remoteControlPresenter/telegram/telegramOutbound.ts @@ -2,8 +2,6 @@ import type { AssistantMessageBlock } from '@shared/types/agent-interface' import { TELEGRAM_OUTBOUND_TEXT_LIMIT } from '../types' const EMPTY_TELEGRAM_TEXT = '(No text output)' -const TELEGRAM_DESKTOP_CONFIRMATION_NOTICE = - 'Desktop confirmation is required to continue this action.' export const createTelegramDraftId = (): number => Math.max(1, Math.trunc(Math.random() * 2_000_000_000)) @@ -36,14 +34,6 @@ export const safeParseAssistantBlocks = (content: string): AssistantMessageBlock } } -export const blocksRequireDesktopConfirmation = (blocks: AssistantMessageBlock[]): boolean => - blocks.some( - (block) => - block.type === 'action' && - (block.action_type === 'tool_call_permission' || block.action_type === 'question_request') && - block.extra?.needsUserAction !== false - ) - const collectText = ( blocks: AssistantMessageBlock[], predicate: (block: AssistantMessageBlock) => boolean @@ -80,12 +70,7 @@ export const extractTelegramStreamText = (blocks: AssistantMessageBlock[]): stri } export const buildTelegramFinalText = (blocks: AssistantMessageBlock[]): string => { - const text = extractTelegramStreamText(blocks) || EMPTY_TELEGRAM_TEXT - if (!blocksRequireDesktopConfirmation(blocks)) { - return text - } - - return `${text}\n\n${TELEGRAM_DESKTOP_CONFIRMATION_NOTICE}`.trim() + return extractTelegramStreamText(blocks) || EMPTY_TELEGRAM_TEXT } export const chunkTelegramText = ( diff --git a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts index cfac17c8f..0cc309c92 100644 --- a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts +++ b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts @@ -4,6 +4,7 @@ import { TELEGRAM_REMOTE_POLL_TIMEOUT_SEC, TELEGRAM_STREAM_POLL_INTERVAL_MS, TELEGRAM_TYPING_DELAY_MS, + type RemotePendingInteraction, type TelegramOutboundAction, type TelegramPollerStatusSnapshot, type TelegramTransportTarget @@ -11,6 +12,7 @@ import { import { RemoteBindingStore } from '../services/remoteBindingStore' import { RemoteCommandRouter } from '../services/remoteCommandRouter' import { chunkTelegramText, createTelegramDraftId } from './telegramOutbound' +import { buildTelegramPendingInteractionPrompt } from './telegramInteractionPrompt' import { TelegramApiRequestError, TelegramClient, type TelegramRawUpdate } from './telegramClient' import { TelegramParser } from './telegramParser' @@ -269,7 +271,12 @@ export class TelegramPoller { while (!this.stopRequested) { const snapshot = await execution.getSnapshot() if (snapshot.completed) { - await this.sendChunkedMessage(target, snapshot.text) + if (snapshot.text.trim()) { + await this.sendChunkedMessage(target, snapshot.text) + } + if (snapshot.pendingInteraction) { + await this.sendPendingInteractionPrompt(target, snapshot.pendingInteraction) + } return } @@ -298,7 +305,12 @@ export class TelegramPoller { while (!this.stopRequested) { const snapshot = await execution.getSnapshot() if (snapshot.completed) { - await this.sendChunkedMessage(target, snapshot.text) + if (snapshot.text.trim()) { + await this.sendChunkedMessage(target, snapshot.text) + } + if (snapshot.pendingInteraction) { + await this.sendPendingInteractionPrompt(target, snapshot.pendingInteraction) + } return } @@ -325,6 +337,22 @@ export class TelegramPoller { } } + private async sendPendingInteractionPrompt( + target: TelegramTransportTarget, + interaction: RemotePendingInteraction + ): Promise { + const endpointKey = this.deps.bindingStore.getEndpointKey(target) + const token = this.deps.bindingStore.createPendingInteractionState(endpointKey, interaction) + const prompt = buildTelegramPendingInteractionPrompt(interaction, token) + + if (prompt.replyMarkup) { + await this.deps.client.sendMessage(target, prompt.text, prompt.replyMarkup) + return + } + + await this.sendChunkedMessage(target, prompt.text) + } + private async dispatchOutboundActions( target: TelegramTransportTarget, actions: TelegramOutboundAction[] diff --git a/src/main/presenter/remoteControlPresenter/types.ts b/src/main/presenter/remoteControlPresenter/types.ts index 3d2046211..0a3775b66 100644 --- a/src/main/presenter/remoteControlPresenter/types.ts +++ b/src/main/presenter/remoteControlPresenter/types.ts @@ -1,5 +1,6 @@ import { z } from 'zod' import type { HookEventName } from '@shared/hooksNotifications' +import type { QuestionOption } from '@shared/types/agent-interface' import type { FeishuPairingSnapshot, FeishuRemoteSettings, @@ -31,6 +32,7 @@ export const TELEGRAM_STREAM_START_TIMEOUT_MS = 8_000 export const TELEGRAM_PRIVATE_THREAD_DEFAULT = 0 export const TELEGRAM_RECENT_SESSION_LIMIT = 10 export const TELEGRAM_MODEL_MENU_TTL_MS = 10 * 60 * 1000 +export const TELEGRAM_INTERACTION_CALLBACK_TTL_MS = 10 * 60 * 1000 export const TELEGRAM_REMOTE_DEFAULT_AGENT_ID = 'deepchat' export const FEISHU_REMOTE_DEFAULT_AGENT_ID = TELEGRAM_REMOTE_DEFAULT_AGENT_ID export const TELEGRAM_REMOTE_REACTION_EMOJI = '🤯' @@ -67,6 +69,10 @@ export const TELEGRAM_REMOTE_COMMANDS = [ command: 'open', description: 'Open the current session on desktop' }, + { + command: 'pending', + description: 'Show the current pending interaction' + }, { command: 'model', description: 'Switch provider and model' @@ -110,6 +116,10 @@ export const FEISHU_REMOTE_COMMANDS = [ command: 'open', description: 'Open the current session on desktop' }, + { + command: 'pending', + description: 'Show the current pending interaction' + }, { command: 'model', description: 'View or switch the current model' @@ -231,6 +241,49 @@ export interface TelegramInlineKeyboardMarkup { inline_keyboard: TelegramInlineKeyboardButton[][] } +export interface RemotePermissionCommandInfo { + command: string + riskLevel: 'low' | 'medium' | 'high' | 'critical' + suggestion: string + signature?: string + baseCommand?: string +} + +export interface RemotePendingInteractionPermission { + permissionType: 'read' | 'write' | 'all' | 'command' + description: string + toolName?: string + serverName?: string + providerId?: string + requestId?: string + rememberable?: boolean + command?: string + commandSignature?: string + paths?: string[] + commandInfo?: RemotePermissionCommandInfo +} + +export interface RemotePendingInteractionQuestion { + header?: string + question: string + options: QuestionOption[] + custom: boolean + multiple: boolean +} + +export interface RemotePendingInteraction { + type: 'permission' | 'question' + messageId: string + toolCallId: string + toolName: string + toolArgs: string + serverName?: string + serverIcons?: string + serverDescription?: string + permission?: RemotePendingInteractionPermission + question?: RemotePendingInteractionQuestion +} + export type TelegramOutboundAction = | { type: 'sendMessage' @@ -267,6 +320,13 @@ export interface TelegramModelMenuState { providers: TelegramModelProviderOption[] } +export interface TelegramPendingInteractionState { + endpointKey: string + createdAt: number + messageId: string + toolCallId: string +} + export type TelegramModelMenuCallback = | { action: 'provider' @@ -284,7 +344,44 @@ export type TelegramModelMenuCallback = token: string } +export type TelegramPendingInteractionCallback = + | { + action: 'allow' | 'deny' | 'other' + token: string + } + | { + action: 'option' + token: string + optionIndex: number + } + +export interface FeishuCardConfig { + enable_forward?: boolean + update_multi?: boolean + wide_screen_mode?: boolean +} + +export interface FeishuInteractiveCardPayload { + config?: FeishuCardConfig + header?: Record + elements?: Array> + i18n_elements?: Record>> + card_link?: Record +} + +export type FeishuOutboundAction = + | { + type: 'sendText' + text: string + } + | { + type: 'sendCard' + card: FeishuInteractiveCardPayload + fallbackText: string + } + const TELEGRAM_MODEL_MENU_CALLBACK_PREFIX = 'model' +const TELEGRAM_INTERACTION_CALLBACK_PREFIX = 'pending' const TELEGRAM_ENDPOINT_KEY_REGEX = /^telegram:(-?\d+):(-?\d+)$/ const FEISHU_ENDPOINT_KEY_REGEX = /^feishu:([^:]+):([^:]+)$/ @@ -360,6 +457,51 @@ export const parseModelMenuCallbackData = (data: string): TelegramModelMenuCallb return null } +export const buildPendingInteractionAllowCallbackData = (token: string): string => + `${TELEGRAM_INTERACTION_CALLBACK_PREFIX}:${token}:allow` + +export const buildPendingInteractionDenyCallbackData = (token: string): string => + `${TELEGRAM_INTERACTION_CALLBACK_PREFIX}:${token}:deny` + +export const buildPendingInteractionOtherCallbackData = (token: string): string => + `${TELEGRAM_INTERACTION_CALLBACK_PREFIX}:${token}:other` + +export const buildPendingInteractionOptionCallbackData = ( + token: string, + optionIndex: number +): string => `${TELEGRAM_INTERACTION_CALLBACK_PREFIX}:${token}:o:${optionIndex}` + +export const parsePendingInteractionCallbackData = ( + data: string +): TelegramPendingInteractionCallback | null => { + const parts = data.trim().split(':') + if (parts[0] !== TELEGRAM_INTERACTION_CALLBACK_PREFIX || !parts[1]) { + return null + } + + const token = parts[1] + const action = parts[2] + if (action === 'allow' || action === 'deny' || action === 'other') { + return { + action, + token + } + } + + if (action === 'o' && parts[3] !== undefined) { + const optionIndex = Number.parseInt(parts[3], 10) + if (Number.isInteger(optionIndex) && optionIndex >= 0) { + return { + action: 'option', + token, + optionIndex + } + } + } + + return null +} + export interface TelegramPollerStatusSnapshot { state: RemoteRuntimeState lastError: string | null diff --git a/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts b/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts index a651c8555..a4a7db1f4 100644 --- a/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts +++ b/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts @@ -18,6 +18,18 @@ const createMessage = ( ...overrides }) +const createBindingStore = () => ({ + getFeishuConfig: vi.fn().mockReturnValue({ + pairedUserOpenIds: ['ou_123'], + bindings: {} + }) +}) + +const createRunner = (overrides: Record = {}) => ({ + getPendingInteraction: vi.fn().mockResolvedValue(null), + ...overrides +}) + describe('FeishuCommandRouter', () => { it('ignores group messages that do not mention the bot', async () => { const router = new FeishuCommandRouter({ @@ -47,7 +59,7 @@ describe('FeishuCommandRouter', () => { }) it('switches models directly from text args', async () => { - const runner = { + const runner = createRunner({ getCurrentSession: vi.fn().mockResolvedValue({ id: 'session-1', title: 'Remote', @@ -67,7 +79,7 @@ describe('FeishuCommandRouter', () => { modelId: 'gpt-5', agentId: 'deepchat' }) - } + }) const router = new FeishuCommandRouter({ authGuard: { ensureAuthorized: vi.fn().mockReturnValue({ @@ -77,12 +89,7 @@ describe('FeishuCommandRouter', () => { pair: vi.fn() } as any, runner: runner as any, - bindingStore: { - getFeishuConfig: vi.fn().mockReturnValue({ - pairedUserOpenIds: ['ou_123'], - bindings: {} - }) - } as any, + bindingStore: createBindingStore() as any, getRuntimeStatus: vi.fn().mockReturnValue({ state: 'running', lastError: null, @@ -114,17 +121,12 @@ describe('FeishuCommandRouter', () => { }), pair: vi.fn() } as any, - runner: { + runner: createRunner({ open: vi.fn().mockResolvedValue({ status: 'windowNotFound' }) - } as any, - bindingStore: { - getFeishuConfig: vi.fn().mockReturnValue({ - pairedUserOpenIds: ['ou_123'], - bindings: {} - }) - } as any, + }) as any, + bindingStore: createBindingStore() as any, getRuntimeStatus: vi.fn().mockReturnValue({ state: 'running', lastError: null, @@ -146,4 +148,160 @@ describe('FeishuCommandRouter', () => { replies: ['Could not find a DeepChat desktop window. Open DeepChat and try /open again.'] }) }) + + it('routes pending permission replies before opening a new turn', async () => { + const runner = { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'permission', + messageId: 'assistant-1', + toolCallId: 'tool-1', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + }), + respondToPendingInteraction: vi.fn().mockResolvedValue({ + waitingForUserMessage: false, + execution: { + sessionId: 'session-1', + eventId: 'assistant-1', + getSnapshot: vi.fn() + } + }) + } + const router = new FeishuCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userOpenId: 'ou_123' + }), + pair: vi.fn() + } as any, + runner: runner as any, + bindingStore: createBindingStore() as any, + getRuntimeStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: 'ALLOW' + }) + ) + + expect(runner.respondToPendingInteraction).toHaveBeenCalledWith('feishu:oc_100:root', { + kind: 'permission', + granted: true + }) + expect(result.replies).toEqual(['Approved. Continuing...']) + expect(result.conversation).toEqual( + expect.objectContaining({ + sessionId: 'session-1' + }) + ) + }) + + it('re-sends the current pending question as a card action', async () => { + const router = new FeishuCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userOpenId: 'ou_123' + }), + pair: vi.fn() + } as any, + runner: { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'question', + messageId: 'assistant-2', + toolCallId: 'tool-2', + toolName: 'ask_user', + toolArgs: '{}', + question: { + question: 'Pick one', + options: [{ label: 'A' }, { label: 'B' }], + custom: true, + multiple: false + } + }) + } as any, + bindingStore: createBindingStore() as any, + getRuntimeStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: '/pending', + command: { + name: 'pending', + args: '' + } + }) + ) + + expect(result.replies).toEqual([]) + expect(result.outboundActions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: 'sendCard', + fallbackText: expect.stringContaining('option number'), + card: expect.objectContaining({ + header: expect.objectContaining({ + title: expect.objectContaining({ + content: 'Question' + }) + }) + }) + }) + ]) + ) + }) + + it('parses option numbers for pending questions', async () => { + const runner = { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'question', + messageId: 'assistant-3', + toolCallId: 'tool-3', + toolName: 'ask_user', + toolArgs: '{}', + question: { + question: 'Pick one', + options: [{ label: 'Alpha' }, { label: 'Beta' }], + custom: false, + multiple: false + } + }), + respondToPendingInteraction: vi.fn().mockResolvedValue({ + waitingForUserMessage: false, + execution: null + }) + } + const router = new FeishuCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userOpenId: 'ou_123' + }), + pair: vi.fn() + } as any, + runner: runner as any, + bindingStore: createBindingStore() as any, + getRuntimeStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: '2' + }) + ) + + expect(runner.respondToPendingInteraction).toHaveBeenCalledWith('feishu:oc_100:root', { + kind: 'question_option', + optionLabel: 'Beta' + }) + expect(result.replies).toEqual(['Selected: Beta']) + }) }) diff --git a/test/main/presenter/remoteControlPresenter/feishuRuntime.test.ts b/test/main/presenter/remoteControlPresenter/feishuRuntime.test.ts index 20f0aaa1a..bf50213b6 100644 --- a/test/main/presenter/remoteControlPresenter/feishuRuntime.test.ts +++ b/test/main/presenter/remoteControlPresenter/feishuRuntime.test.ts @@ -52,7 +52,8 @@ const createHarness = async (options?: { logger?: { error: (...params: unknown[] streamHandlers.push(params.onMessage) }), stop: vi.fn(), - sendText: vi.fn().mockResolvedValue(undefined) + sendText: vi.fn().mockResolvedValue(undefined), + sendCard: vi.fn().mockResolvedValue(undefined) } const parser = { parseEvent: vi.fn((event: { parsed?: FeishuInboundMessage | null }) => event.parsed ?? null) @@ -90,6 +91,7 @@ describe('FeishuRuntime', () => { messageId: string | null text: string completed: boolean + pendingInteraction: null }>() const harness = await createHarness() harness.router.handleMessage.mockResolvedValue({ @@ -118,7 +120,8 @@ describe('FeishuRuntime', () => { deferred.resolve({ messageId: 'msg-1', text: 'done', - completed: true + completed: true, + pendingInteraction: null }) await vi.waitFor(() => { @@ -414,6 +417,7 @@ describe('FeishuRuntime', () => { messageId: string | null text: string completed: boolean + pendingInteraction: null }>() const harness = await createHarness() @@ -471,7 +475,8 @@ describe('FeishuRuntime', () => { deferred.resolve({ messageId: 'msg-1', text: 'partial output', - completed: true + completed: true, + pendingInteraction: null }) await vi.waitFor(() => { @@ -501,7 +506,8 @@ describe('FeishuRuntime', () => { getSnapshot: vi.fn().mockResolvedValue({ messageId: null, text: '', - completed: false + completed: false, + pendingInteraction: null }) } }) @@ -538,6 +544,7 @@ describe('FeishuRuntime', () => { messageId: string | null text: string completed: boolean + pendingInteraction: null }>() const harness = await createHarness() @@ -576,7 +583,8 @@ describe('FeishuRuntime', () => { deferred.resolve({ messageId: 'msg-1', text: 'stale conversation output', - completed: true + completed: true, + pendingInteraction: null }) await Promise.resolve() await Promise.resolve() @@ -607,4 +615,109 @@ describe('FeishuRuntime', () => { await harness.runtime.stop() }) + + it('sends pending interaction cards after conversation text completes', async () => { + const harness = await createHarness() + harness.router.handleMessage.mockResolvedValue({ + replies: [], + conversation: { + sessionId: 'session-1', + eventId: 'msg-1', + getSnapshot: vi.fn().mockResolvedValue({ + messageId: 'msg-1', + text: 'Need approval', + completed: true, + pendingInteraction: { + type: 'permission', + messageId: 'msg-1', + toolCallId: 'tool-1', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + } + }) + } + }) + + await harness.emitMessage({ + parsed: createParsedMessage({ + eventId: 'evt-pending-card', + messageId: 'om-pending-card' + }) + }) + + await vi.waitFor(() => { + expect(harness.client.sendText).toHaveBeenCalledWith( + { + chatId: 'oc_1', + threadId: null, + replyToMessageId: 'om-pending-card' + }, + 'Need approval' + ) + expect(harness.client.sendCard).toHaveBeenCalledWith( + { + chatId: 'oc_1', + threadId: null, + replyToMessageId: 'om-pending-card' + }, + expect.objectContaining({ + header: expect.objectContaining({ + title: expect.objectContaining({ + content: 'Permission Required' + }) + }) + }) + ) + }) + + await harness.runtime.stop() + }) + + it('falls back to text when sending a Feishu card fails', async () => { + const harness = await createHarness() + harness.client.sendCard.mockRejectedValueOnce(new Error('card send failed')) + harness.router.handleMessage.mockResolvedValue({ + replies: [], + outboundActions: [ + { + type: 'sendCard', + card: { + header: { + title: { + tag: 'plain_text', + content: 'Question' + } + } + }, + fallbackText: 'Reply with ALLOW or DENY.' + } + ] + }) + + await harness.emitMessage({ + parsed: createParsedMessage({ + eventId: 'evt-card-fallback', + messageId: 'om-card-fallback' + }) + }) + + await vi.waitFor(() => { + expect(harness.client.sendCard).toHaveBeenCalled() + expect(harness.client.sendText).toHaveBeenCalledWith( + { + chatId: 'oc_1', + threadId: null, + replyToMessageId: 'om-card-fallback' + }, + 'Reply with ALLOW or DENY.' + ) + }) + + await harness.runtime.stop() + }) }) diff --git a/test/main/presenter/remoteControlPresenter/remoteBindingStore.test.ts b/test/main/presenter/remoteControlPresenter/remoteBindingStore.test.ts index 8a197ec2a..35f9e304c 100644 --- a/test/main/presenter/remoteControlPresenter/remoteBindingStore.test.ts +++ b/test/main/presenter/remoteControlPresenter/remoteBindingStore.test.ts @@ -244,6 +244,28 @@ describe('RemoteBindingStore', () => { expect(store.getModelMenuState(token, 10 * 60 * 1000)).toBeNull() }) + it('keeps pending interaction tokens in memory and clears them after rebinding the endpoint', () => { + const configPresenter = createConfigPresenter() + const store = new RemoteBindingStore(configPresenter as any) + + const token = store.createPendingInteractionState('telegram:100:0', { + messageId: 'assistant-1', + toolCallId: 'tool-1' + }) + + expect(store.getPendingInteractionState(token)).toEqual( + expect.objectContaining({ + endpointKey: 'telegram:100:0', + messageId: 'assistant-1', + toolCallId: 'tool-1' + }) + ) + + store.setBinding('telegram:100:0', 'session-2') + + expect(store.getPendingInteractionState(token)).toBeNull() + }) + it('normalizes binding meta channel from the endpoint key', () => { const configPresenter = createConfigPresenter() const store = new RemoteBindingStore(configPresenter as any) diff --git a/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts b/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts index 5d88e663a..5ef4a270c 100644 --- a/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts +++ b/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts @@ -42,7 +42,15 @@ const createBindingStore = () => ({ }), createModelMenuState: vi.fn().mockReturnValue('menu-token'), getModelMenuState: vi.fn(), - clearModelMenuState: vi.fn() + clearModelMenuState: vi.fn(), + createPendingInteractionState: vi.fn().mockReturnValue('pending-token'), + getPendingInteractionState: vi.fn(), + clearPendingInteractionState: vi.fn() +}) + +const createRunner = (overrides: Record = {}) => ({ + getPendingInteraction: vi.fn().mockResolvedValue(null), + ...overrides }) describe('RemoteCommandRouter', () => { @@ -86,7 +94,8 @@ describe('RemoteCommandRouter', () => { } const runner = { sendText: vi.fn().mockResolvedValue(conversation), - getDefaultAgentId: vi.fn().mockResolvedValue('deepchat') + getDefaultAgentId: vi.fn().mockResolvedValue('deepchat'), + getPendingInteraction: vi.fn().mockResolvedValue(null) } const bindingStore = createBindingStore() const router = new RemoteCommandRouter({ @@ -116,9 +125,9 @@ describe('RemoteCommandRouter', () => { }) it('returns usage help for an invalid /use command', async () => { - const runner = { + const runner = createRunner({ useSessionByIndex: vi.fn() - } + }) const router = new RemoteCommandRouter({ authGuard: { ensureAuthorized: vi.fn().mockReturnValue({ @@ -161,7 +170,7 @@ describe('RemoteCommandRouter', () => { }), pair: vi.fn() } as any, - runner: { + runner: createRunner({ getDefaultAgentId: vi.fn().mockResolvedValue('deepchat-alt'), getStatus: vi.fn().mockResolvedValue({ session: { @@ -171,9 +180,10 @@ describe('RemoteCommandRouter', () => { modelId: 'gpt-5' }, activeEventId: 'msg-1', - isGenerating: true + isGenerating: true, + pendingInteraction: null }) - } as any, + }) as any, bindingStore: createBindingStore() as any, getPollerStatus: vi.fn().mockReturnValue({ state: 'running', @@ -233,11 +243,11 @@ describe('RemoteCommandRouter', () => { }), pair: vi.fn() } as any, - runner: { + runner: createRunner({ open: vi.fn().mockResolvedValue({ status: 'noSession' }) - } as any, + }) as any, bindingStore: createBindingStore() as any, getPollerStatus: vi.fn() }) @@ -266,11 +276,11 @@ describe('RemoteCommandRouter', () => { }), pair: vi.fn() } as any, - runner: { + runner: createRunner({ open: vi.fn().mockResolvedValue({ status: 'windowNotFound' }) - } as any, + }) as any, bindingStore: createBindingStore() as any, getPollerStatus: vi.fn() }) @@ -299,7 +309,7 @@ describe('RemoteCommandRouter', () => { }), pair: vi.fn() } as any, - runner: { + runner: createRunner({ open: vi.fn().mockResolvedValue({ status: 'ok', session: { @@ -307,7 +317,7 @@ describe('RemoteCommandRouter', () => { title: 'Remote chat' } }) - } as any, + }) as any, bindingStore: createBindingStore() as any, getPollerStatus: vi.fn() }) @@ -328,9 +338,9 @@ describe('RemoteCommandRouter', () => { }) it('returns a prompt when /model is used without a bound session', async () => { - const runner = { + const runner = createRunner({ getCurrentSession: vi.fn().mockResolvedValue(null) - } + }) const router = new RemoteCommandRouter({ authGuard: { ensureAuthorized: vi.fn().mockReturnValue({ @@ -360,7 +370,7 @@ describe('RemoteCommandRouter', () => { }) it('creates a provider menu for /model', async () => { - const runner = { + const runner = createRunner({ getCurrentSession: vi.fn().mockResolvedValue({ id: 'session-1', title: 'Remote chat', @@ -379,7 +389,7 @@ describe('RemoteCommandRouter', () => { models: [{ modelId: 'claude-3-5-sonnet', modelName: 'Claude 3.5 Sonnet' }] } ]) - } + }) const bindingStore = createBindingStore() const router = new RemoteCommandRouter({ authGuard: { @@ -441,7 +451,7 @@ describe('RemoteCommandRouter', () => { ] }) - const runner = { + const runner = createRunner({ getCurrentSession: vi.fn().mockResolvedValue({ id: 'session-1', title: 'Remote chat', @@ -454,7 +464,7 @@ describe('RemoteCommandRouter', () => { providerId: 'anthropic', modelId: 'claude-3-5-sonnet' }) - } + }) const router = new RemoteCommandRouter({ authGuard: { ensureAuthorized: vi.fn().mockReturnValue({ @@ -504,7 +514,7 @@ describe('RemoteCommandRouter', () => { }), pair: vi.fn() } as any, - runner: {} as any, + runner: createRunner() as any, bindingStore: bindingStore as any, getPollerStatus: vi.fn() }) @@ -528,4 +538,192 @@ describe('RemoteCommandRouter', () => { } ]) }) + + it('routes plain text to a pending permission response before opening a new turn', async () => { + const runner = { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'permission', + messageId: 'assistant-1', + toolCallId: 'tool-1', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + }), + respondToPendingInteraction: vi.fn().mockResolvedValue({ + waitingForUserMessage: false, + execution: { + sessionId: 'session-1', + eventId: 'assistant-1', + getSnapshot: vi.fn() + } + }) + } + const router = new RemoteCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userId: 123 + }), + pair: vi.fn() + } as any, + runner: runner as any, + bindingStore: createBindingStore() as any, + getPollerStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: 'ALLOW' + }) + ) + + expect(runner.respondToPendingInteraction).toHaveBeenCalledWith('telegram:100:0', { + kind: 'permission', + granted: true + }) + expect(result.replies).toEqual(['Approved. Continuing...']) + expect(result.conversation).toEqual( + expect.objectContaining({ + sessionId: 'session-1' + }) + ) + }) + + it('re-sends the current pending interaction with buttons', async () => { + const bindingStore = createBindingStore() + const router = new RemoteCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userId: 123 + }), + pair: vi.fn() + } as any, + runner: { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'question', + messageId: 'assistant-2', + toolCallId: 'tool-2', + toolName: 'deepchat_question', + toolArgs: '{}', + question: { + question: 'Pick one', + options: [{ label: 'A' }, { label: 'B' }], + custom: true, + multiple: false + } + }) + } as any, + bindingStore: bindingStore as any, + getPollerStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: '/pending', + command: { + name: 'pending', + args: '' + } + }) + ) + + expect(bindingStore.createPendingInteractionState).toHaveBeenCalledWith('telegram:100:0', { + type: 'question', + messageId: 'assistant-2', + toolCallId: 'tool-2', + toolName: 'deepchat_question', + toolArgs: '{}', + question: { + question: 'Pick one', + options: [{ label: 'A' }, { label: 'B' }], + custom: true, + multiple: false + } + }) + expect(result.outboundActions).toEqual([ + expect.objectContaining({ + type: 'sendMessage', + text: expect.stringContaining('Question'), + replyMarkup: { + inline_keyboard: expect.arrayContaining([ + [ + expect.objectContaining({ + text: 'A' + }), + expect.objectContaining({ + text: 'B' + }) + ] + ]) + } + }) + ]) + }) + + it('refreshes expired pending interaction callbacks with the latest prompt', async () => { + const bindingStore = createBindingStore() + bindingStore.getPendingInteractionState.mockReturnValue(null) + const router = new RemoteCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userId: 123 + }), + pair: vi.fn() + } as any, + runner: { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'permission', + messageId: 'assistant-3', + toolCallId: 'tool-3', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + }) + } as any, + bindingStore: bindingStore as any, + getPollerStatus: vi.fn() + }) + + const result = await router.handleMessage( + createCallbackQuery({ + data: 'pending:expired-token:allow' + }) + ) + + expect(bindingStore.createPendingInteractionState).toHaveBeenCalledWith('telegram:100:0', { + type: 'permission', + messageId: 'assistant-3', + toolCallId: 'tool-3', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + }) + expect(result.callbackAnswer).toEqual({ + text: 'Prompt refreshed.' + }) + expect(result.outboundActions).toEqual([ + expect.objectContaining({ + type: 'editMessageText', + messageId: 30, + text: expect.stringContaining('Permission Required'), + replyMarkup: expect.objectContaining({ + inline_keyboard: expect.any(Array) + }) + }) + ]) + }) }) diff --git a/test/main/presenter/remoteControlPresenter/remoteConversationRunner.test.ts b/test/main/presenter/remoteControlPresenter/remoteConversationRunner.test.ts index 842200008..e874bf06d 100644 --- a/test/main/presenter/remoteControlPresenter/remoteConversationRunner.test.ts +++ b/test/main/presenter/remoteControlPresenter/remoteConversationRunner.test.ts @@ -388,9 +388,239 @@ describe('RemoteConversationRunner', () => { expect(snapshot).toEqual({ messageId: null, text: 'No assistant response was produced.', - completed: true + completed: true, + pendingInteraction: null }) vi.useRealTimers() }) + + it('extracts the latest pending interaction from assistant action blocks', async () => { + const runner = new RemoteConversationRunner( + { + configPresenter: {} as any, + newAgentPresenter: { + getSession: vi.fn().mockResolvedValue(createSession()), + getMessages: vi.fn().mockResolvedValue([ + { + id: 'assistant-1', + role: 'assistant', + orderSeq: 2, + content: JSON.stringify([ + { + type: 'content', + content: 'Need approval before continuing.', + status: 'success', + timestamp: 1 + }, + { + type: 'action', + action_type: 'tool_call_permission', + content: 'Permission requested', + status: 'pending', + timestamp: 2, + tool_call: { + id: 'tool-1', + name: 'shell_command', + params: '{"command":"git push"}' + }, + extra: { + needsUserAction: true, + permissionType: 'command', + permissionRequest: JSON.stringify({ + permissionType: 'command', + description: 'Run git push', + command: 'git push', + commandInfo: { + command: 'git push', + riskLevel: 'high', + suggestion: 'Confirm before pushing.' + } + }) + } + } + ]) + } + ]) + } as any, + deepchatAgentPresenter: {} as any, + windowPresenter: {} as any, + tabPresenter: {} as any, + resolveDefaultAgentId: vi.fn().mockResolvedValue('deepchat') + }, + { + getBinding: vi.fn().mockReturnValue({ + sessionId: 'session-1', + updatedAt: 1 + }) + } as any + ) + + await expect(runner.getPendingInteraction('telegram:100:0')).resolves.toEqual({ + type: 'permission', + messageId: 'assistant-1', + toolCallId: 'tool-1', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + rememberable: true, + command: 'git push', + commandInfo: { + command: 'git push', + riskLevel: 'high', + suggestion: 'Confirm before pushing.' + } + } + }) + }) + + it('creates a follow-up execution after responding to a pending interaction', async () => { + const getMessage = vi + .fn() + .mockResolvedValueOnce({ + id: 'assistant-2', + role: 'assistant', + orderSeq: 5, + content: JSON.stringify([ + { + type: 'action', + action_type: 'tool_call_permission', + content: 'Permission requested', + status: 'pending', + timestamp: 1, + tool_call: { + id: 'tool-2', + name: 'shell_command', + params: '{"command":"git push"}' + }, + extra: { + needsUserAction: true, + permissionType: 'command', + permissionRequest: JSON.stringify({ + permissionType: 'command', + description: 'Run git push', + command: 'git push' + }) + } + } + ]) + }) + .mockResolvedValue({ + id: 'assistant-2', + role: 'assistant', + orderSeq: 5, + status: 'success', + content: JSON.stringify([ + { + type: 'content', + content: 'Push completed.', + status: 'success', + timestamp: 2 + } + ]) + }) + const newAgentPresenter = { + getSession: vi.fn().mockResolvedValue(createSession()), + getMessages: vi + .fn() + .mockResolvedValueOnce([ + { + id: 'assistant-2', + role: 'assistant', + orderSeq: 5, + content: JSON.stringify([ + { + type: 'action', + action_type: 'tool_call_permission', + content: 'Permission requested', + status: 'pending', + timestamp: 1, + tool_call: { + id: 'tool-2', + name: 'shell_command', + params: '{"command":"git push"}' + }, + extra: { + needsUserAction: true, + permissionType: 'command', + permissionRequest: JSON.stringify({ + permissionType: 'command', + description: 'Run git push', + command: 'git push' + }) + } + } + ]) + } + ]) + .mockResolvedValue([ + { + id: 'assistant-2', + role: 'assistant', + orderSeq: 5, + status: 'success', + content: JSON.stringify([ + { + type: 'content', + content: 'Push completed.', + status: 'success', + timestamp: 2 + } + ]) + } + ]), + respondToolInteraction: vi.fn().mockResolvedValue({ + resumed: true, + waitingForUserMessage: false + }), + getMessage + } + const bindingStore = { + getBinding: vi.fn().mockReturnValue({ + sessionId: 'session-1', + updatedAt: 1 + }), + clearActiveEvent: vi.fn(), + rememberActiveEvent: vi.fn() + } + const runner = new RemoteConversationRunner( + { + configPresenter: {} as any, + newAgentPresenter: newAgentPresenter as any, + deepchatAgentPresenter: { + getActiveGeneration: vi.fn().mockReturnValue(null) + } as any, + windowPresenter: {} as any, + tabPresenter: {} as any, + resolveDefaultAgentId: vi.fn().mockResolvedValue('deepchat') + }, + bindingStore as any + ) + + const response = await runner.respondToPendingInteraction('telegram:100:0', { + kind: 'permission', + granted: true + }) + + expect(newAgentPresenter.respondToolInteraction).toHaveBeenCalledWith( + 'session-1', + 'assistant-2', + 'tool-2', + { + kind: 'permission', + granted: true + } + ) + expect(response.waitingForUserMessage).toBe(false) + + const snapshot = await response.execution?.getSnapshot() + expect(snapshot).toEqual({ + messageId: 'assistant-2', + text: 'Push completed.', + completed: true, + pendingInteraction: null + }) + }) }) diff --git a/test/main/presenter/remoteControlPresenter/telegramOutbound.test.ts b/test/main/presenter/remoteControlPresenter/telegramOutbound.test.ts index 0006a2b7a..6cfb29f75 100644 --- a/test/main/presenter/remoteControlPresenter/telegramOutbound.test.ts +++ b/test/main/presenter/remoteControlPresenter/telegramOutbound.test.ts @@ -27,7 +27,7 @@ describe('telegramOutbound', () => { ).toBe('Hello\n\nWorld') }) - it('appends desktop confirmation notice for pending approval blocks', () => { + it('keeps pending approval content without appending desktop confirmation notice', () => { const text = buildTelegramFinalText([ { type: 'content', @@ -48,7 +48,7 @@ describe('telegramOutbound', () => { ]) expect(text).toContain('Need your approval') - expect(text).toContain('Desktop confirmation is required') + expect(text).not.toContain('Desktop confirmation is required') }) it('skips drafts for reasoning and action-only blocks', () => { diff --git a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts index 46ced8ef6..3fab80ee8 100644 --- a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts +++ b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts @@ -307,7 +307,8 @@ describe('TelegramPoller', () => { getSnapshot: vi.fn().mockResolvedValue({ messageId: 'msg-1', text: 'pong', - completed: true + completed: true, + pendingInteraction: null }) } }) @@ -723,4 +724,104 @@ describe('TelegramPoller', () => { await poller.stop() warnSpy.mockRestore() }) + + it('sends pending interaction prompts after completed conversation output', async () => { + const client = createClient() + client.getUpdates + .mockResolvedValueOnce([ + { + update_id: 1, + message: { + message_id: 20, + chat: { + id: 100, + type: 'private' + }, + from: { + id: 123 + }, + text: 'hello' + } + } + ]) + .mockImplementation(createBlockingUpdates()) + + const poller = new TelegramPoller({ + client: client as any, + parser: { + parseUpdate: vi.fn().mockReturnValue({ + kind: 'message', + updateId: 1, + chatId: 100, + messageThreadId: 0, + messageId: 20, + chatType: 'private', + fromId: 123, + text: 'hello', + command: null + }) + } as any, + router: { + handleMessage: vi.fn().mockResolvedValue({ + replies: [], + conversation: { + sessionId: 'session-1', + eventId: 'msg-1', + getSnapshot: vi.fn().mockResolvedValue({ + messageId: 'msg-1', + text: 'Need approval', + completed: true, + pendingInteraction: { + type: 'permission', + messageId: 'msg-1', + toolCallId: 'tool-1', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + } + }) + } + }) + } as any, + bindingStore: { + getPollOffset: vi.fn().mockReturnValue(0), + setPollOffset: vi.fn(), + getTelegramConfig: vi.fn().mockReturnValue({ + streamMode: 'draft' + }), + getEndpointKey: vi.fn().mockReturnValue('telegram:100:0'), + createPendingInteractionState: vi.fn().mockReturnValue('pending-token') + } as any + }) + + await poller.start() + + await vi.waitFor(() => { + expect(client.sendMessage).toHaveBeenNthCalledWith( + 1, + { + chatId: 100, + messageThreadId: 0 + }, + 'Need approval' + ) + expect(client.sendMessage).toHaveBeenNthCalledWith( + 2, + { + chatId: 100, + messageThreadId: 0 + }, + expect.stringContaining('Permission Required'), + expect.objectContaining({ + inline_keyboard: expect.any(Array) + }) + ) + }) + + await poller.stop() + }) }) From d7a69576c55ff3773783db1fce072d1488368a75 Mon Sep 17 00:00:00 2001 From: zerob13 Date: Sun, 29 Mar 2026 21:21:39 +0800 Subject: [PATCH 2/3] fix(remote): update tg prompt immediately --- .../services/remoteCommandRouter.ts | 60 +++++++- .../telegram/telegramPoller.ts | 50 +++++-- .../remoteCommandRouter.test.ts | 91 ++++++++++++ .../telegramPoller.test.ts | 133 ++++++++++++++++++ 4 files changed, 316 insertions(+), 18 deletions(-) diff --git a/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts b/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts index 926662464..b32aa0c34 100644 --- a/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts +++ b/src/main/presenter/remoteControlPresenter/services/remoteCommandRouter.ts @@ -36,6 +36,13 @@ export interface RemoteCommandRouteResult { outboundActions?: TelegramOutboundAction[] conversation?: RemoteConversationExecution callbackAnswer?: TelegramCallbackAnswer + deferred?: Promise +} + +export interface RemoteCommandRouteContinuation { + replies?: string[] + outboundActions?: TelegramOutboundAction[] + conversation?: RemoteConversationExecution } type RemoteCommandRouterDeps = { @@ -437,7 +444,7 @@ export class RemoteCommandRouter { this.deps.bindingStore.clearPendingInteractionState(callback.token) - const result = await this.deps.runner.respondToPendingInteraction(endpointKey, response) + const waitingForUserMessage = response.kind === 'question_other' return { replies: [], outboundActions: [ @@ -447,15 +454,20 @@ export class RemoteCommandRouter { text: buildTelegramInteractionResolvedText({ interaction, responseText: this.describeInteractionResponse(interaction, response), - waitingForUserMessage: result.waitingForUserMessage + waitingForUserMessage }), replyMarkup: null } ], - ...(result.execution ? { conversation: result.execution } : {}), callbackAnswer: { - text: result.waitingForUserMessage ? 'Reply with your answer.' : 'Continuing...' - } + text: waitingForUserMessage ? 'Reply with your answer.' : 'Continuing...' + }, + deferred: this.buildPendingCallbackContinuation( + endpointKey, + event.messageId, + interaction, + response + ) } } @@ -621,6 +633,44 @@ export class RemoteCommandRouter { } } + private async buildPendingCallbackContinuation( + endpointKey: string, + messageId: number, + interaction: RemotePendingInteraction, + response: ToolInteractionResponse + ): Promise { + try { + const result = await this.deps.runner.respondToPendingInteraction(endpointKey, response) + + if (result.waitingForUserMessage) { + if (response.kind === 'question_other') { + return {} + } + + return { + outboundActions: [ + { + type: 'editMessageText', + messageId, + text: buildTelegramInteractionResolvedText({ + interaction, + responseText: this.describeInteractionResponse(interaction, response), + waitingForUserMessage: true + }), + replyMarkup: null + } + ] + } + } + + return result.execution ? { conversation: result.execution } : {} + } catch (error) { + return { + replies: [error instanceof Error ? error.message : String(error)] + } + } + } + private resolvePendingCallbackResponse( interaction: RemotePendingInteraction, callback: TelegramPendingInteractionCallback diff --git a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts index 0cc309c92..91a49a11e 100644 --- a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts +++ b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts @@ -5,12 +5,17 @@ import { TELEGRAM_STREAM_POLL_INTERVAL_MS, TELEGRAM_TYPING_DELAY_MS, type RemotePendingInteraction, + type TelegramInboundMessage, type TelegramOutboundAction, type TelegramPollerStatusSnapshot, type TelegramTransportTarget } from '../types' import { RemoteBindingStore } from '../services/remoteBindingStore' -import { RemoteCommandRouter } from '../services/remoteCommandRouter' +import { + RemoteCommandRouter, + type RemoteCommandRouteContinuation, + type RemoteCommandRouteResult +} from '../services/remoteCommandRouter' import { chunkTelegramText, createTelegramDraftId } from './telegramOutbound' import { buildTelegramPendingInteractionPrompt } from './telegramInteractionPrompt' import { TelegramApiRequestError, TelegramClient, type TelegramRawUpdate } from './telegramClient' @@ -212,7 +217,26 @@ export class TelegramPoller { await callbackAcknowledger.answer(routed.callbackAnswer) } - for (const reply of routed.replies) { + await this.dispatchRouteResult( + target, + routed, + parsed.kind === 'message' && !parsed.command ? parsed : null + ) + + if (routed.deferred) { + const deferred = await routed.deferred + await this.dispatchRouteResult(target, deferred) + } + } + + private async dispatchRouteResult( + target: TelegramTransportTarget, + routed: + | Pick + | RemoteCommandRouteContinuation, + reactionMessage?: TelegramInboundMessage | null + ): Promise { + for (const reply of routed.replies ?? []) { await this.sendChunkedMessage(target, reply) } @@ -220,19 +244,19 @@ export class TelegramPoller { await this.dispatchOutboundActions(target, routed.outboundActions) } - if (routed.conversation) { - const reactionMessage = parsed.kind === 'message' && !parsed.command ? parsed : null + if (!routed.conversation) { + return + } - if (reactionMessage) { - await this.setIncomingReaction(reactionMessage.chatId, reactionMessage.messageId) - } + if (reactionMessage) { + await this.setIncomingReaction(reactionMessage.chatId, reactionMessage.messageId) + } - try { - await this.deliverConversation(target, routed.conversation) - } finally { - if (reactionMessage) { - await this.clearIncomingReaction(reactionMessage.chatId, reactionMessage.messageId) - } + try { + await this.deliverConversation(target, routed.conversation) + } finally { + if (reactionMessage) { + await this.clearIncomingReaction(reactionMessage.chatId, reactionMessage.messageId) } } } diff --git a/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts b/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts index 5ef4a270c..30755b418 100644 --- a/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts +++ b/test/main/presenter/remoteControlPresenter/remoteCommandRouter.test.ts @@ -53,6 +53,20 @@ const createRunner = (overrides: Record = {}) => ({ ...overrides }) +const createDeferred = () => { + let resolve!: (value: T | PromiseLike) => void + let reject!: (reason?: unknown) => void + const promise = new Promise((nextResolve, nextReject) => { + resolve = nextResolve + reject = nextReject + }) + return { + promise, + resolve, + reject + } +} + describe('RemoteCommandRouter', () => { it('returns pairing guidance for unauthorized plain text', async () => { const router = new RemoteCommandRouter({ @@ -726,4 +740,81 @@ describe('RemoteCommandRouter', () => { }) ]) }) + + it('returns pending callback edits immediately before continuation completes', async () => { + const bindingStore = createBindingStore() + bindingStore.getPendingInteractionState.mockReturnValue({ + endpointKey: 'telegram:100:0', + createdAt: Date.now(), + messageId: 'assistant-4', + toolCallId: 'tool-4' + }) + const deferred = createDeferred<{ + waitingForUserMessage: boolean + execution: null + }>() + const runner = { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'permission', + messageId: 'assistant-4', + toolCallId: 'tool-4', + toolName: 'shell_command', + toolArgs: '{"command":"git push"}', + permission: { + permissionType: 'command', + description: 'Run git push', + command: 'git push' + } + }), + respondToPendingInteraction: vi.fn().mockReturnValue(deferred.promise) + } + const router = new RemoteCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userId: 123 + }), + pair: vi.fn() + } as any, + runner: runner as any, + bindingStore: bindingStore as any, + getPollerStatus: vi.fn() + }) + + const result = await Promise.race([ + router.handleMessage( + createCallbackQuery({ + data: 'pending:pending-token:allow' + }) + ), + new Promise<'timeout'>((resolve) => setTimeout(() => resolve('timeout'), 25)) + ]) + + expect(result).not.toBe('timeout') + expect(runner.respondToPendingInteraction).toHaveBeenCalledWith('telegram:100:0', { + kind: 'permission', + granted: true + }) + expect(result).toEqual( + expect.objectContaining({ + callbackAnswer: { + text: 'Continuing...' + }, + outboundActions: [ + expect.objectContaining({ + type: 'editMessageText', + messageId: 30, + text: expect.stringContaining('Permission handled.') + }) + ], + deferred: expect.any(Promise) + }) + ) + + deferred.resolve({ + waitingForUserMessage: false, + execution: null + }) + await (result as Exclude).deferred + }) }) diff --git a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts index 3fab80ee8..5f895e95d 100644 --- a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts +++ b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts @@ -30,6 +30,20 @@ const createBlockingUpdates = ) }) +const createDeferred = () => { + let resolve!: (value: T | PromiseLike) => void + let reject!: (reason?: unknown) => void + const promise = new Promise((nextResolve, nextReject) => { + resolve = nextResolve + reject = nextReject + }) + return { + promise, + resolve, + reject + } +} + describe('TelegramPoller', () => { it('reports running while waiting on long polling', async () => { const client = createClient() @@ -824,4 +838,123 @@ describe('TelegramPoller', () => { await poller.stop() }) + + it('edits pending interaction cards before deferred continuation finishes', async () => { + const client = createClient() + const deferred = createDeferred<{ + conversation?: { + sessionId: string + eventId: string + getSnapshot: () => Promise<{ + messageId: string | null + text: string + completed: boolean + pendingInteraction: null + }> + } + }>() + client.getUpdates + .mockResolvedValueOnce([ + { + update_id: 2, + callback_query: { + id: 'callback-1', + from: { + id: 123 + }, + data: 'pending:token:allow', + message: { + message_id: 30, + chat: { + id: 100, + type: 'private' + } + } + } + } + ]) + .mockImplementation(createBlockingUpdates()) + + const poller = new TelegramPoller({ + client: client as any, + parser: { + parseUpdate: vi.fn().mockReturnValue({ + kind: 'callback_query', + updateId: 2, + chatId: 100, + messageThreadId: 0, + messageId: 30, + chatType: 'private', + fromId: 123, + callbackQueryId: 'callback-1', + data: 'pending:token:allow' + }) + } as any, + router: { + handleMessage: vi.fn().mockResolvedValue({ + replies: [], + outboundActions: [ + { + type: 'editMessageText', + messageId: 30, + text: 'Permission handled.\nApproved. Continuing...', + replyMarkup: null + } + ], + callbackAnswer: { + text: 'Continuing...' + }, + deferred: deferred.promise + }) + } as any, + bindingStore: { + getPollOffset: vi.fn().mockReturnValue(0), + setPollOffset: vi.fn(), + getTelegramConfig: vi.fn().mockReturnValue({ + streamMode: 'draft' + }) + } as any + }) + + await poller.start() + + await vi.waitFor(() => { + expect(client.editMessageText).toHaveBeenCalledWith({ + target: { + chatId: 100, + messageThreadId: 0 + }, + messageId: 30, + text: 'Permission handled.\nApproved. Continuing...', + replyMarkup: undefined + }) + }) + + expect(client.sendMessage).not.toHaveBeenCalled() + + deferred.resolve({ + conversation: { + sessionId: 'session-1', + eventId: 'msg-1', + getSnapshot: vi.fn().mockResolvedValue({ + messageId: 'msg-1', + text: 'Done', + completed: true, + pendingInteraction: null + }) + } + }) + + await vi.waitFor(() => { + expect(client.sendMessage).toHaveBeenCalledWith( + { + chatId: 100, + messageThreadId: 0 + }, + 'Done' + ) + }) + + await poller.stop() + }) }) From 8fa5d930795e3dc972c33e403b5d674c9a84c8b3 Mon Sep 17 00:00:00 2001 From: zerob13 Date: Sun, 29 Mar 2026 21:53:30 +0800 Subject: [PATCH 3/3] fix(remote): refine remote interactions --- docs/specs/remote-tool-interactions/plan.md | 154 ++++++++++++------ docs/specs/remote-tool-interactions/tasks.md | 103 ++++++++++++ .../services/feishuCommandRouter.ts | 23 +-- .../telegram/telegramPoller.ts | 89 +++++++--- .../feishuCommandRouter.test.ts | 48 +++++- .../telegramPoller.test.ts | 91 +++++++++++ 6 files changed, 425 insertions(+), 83 deletions(-) create mode 100644 docs/specs/remote-tool-interactions/tasks.md diff --git a/docs/specs/remote-tool-interactions/plan.md b/docs/specs/remote-tool-interactions/plan.md index cc6d46c3e..9c758ec0d 100644 --- a/docs/specs/remote-tool-interactions/plan.md +++ b/docs/specs/remote-tool-interactions/plan.md @@ -2,70 +2,124 @@ ## Summary -Implement a structured remote interaction loop for Telegram and Feishu by extending the runner snapshot model, teaching the routers to pause around pending interactions, and adding channel-specific prompt rendering plus response parsing. +Implement a structured remote interaction loop for Telegram and Feishu so remote endpoints can resolve paused permission and question interactions without falling back to a generic desktop-only notice. The feature stays inside Electron main and reuses the existing `RemoteConversationRunner`, `RemoteCommandRouter`, `FeishuCommandRouter`, and `newAgentPresenter.respondToolInteraction(...)` flow. -## Main Process Changes +## Goals -- Extend `RemoteConversationSnapshot` and runner status with `pendingInteraction`. -- Parse assistant `tool_call_permission` and `question_request` action blocks into a shared `RemotePendingInteraction` model. -- Add `RemoteConversationRunner.getPendingInteraction()` and `respondToPendingInteraction()` so routers can resolve paused tool interactions without creating a new turn. -- Keep follow-up polling on the same assistant message after a tool interaction response, allowing chained interactions to surface one by one. +- Expose `RemoteConversationSnapshot.pendingInteraction` as the canonical paused-interaction state for remote delivery. +- Preserve the current detached-session and bound-endpoint model without adding renderer IPC. +- Let Telegram resolve interactions with inline buttons plus text fallback. +- Let Feishu render interaction cards and fall back to complete plain-text prompts when card delivery fails. +- Keep command/session state safe while an interaction is unresolved. -## Router Flow +## Readiness -- Check for a current pending interaction before routing mutable commands or plain text. -- Allow `/help`, `/status`, `/open`, and `/pending` while blocking `/new`, `/use`, `/model`, and unrelated plain-text turns. -- Add `/pending` to both channel command lists and make `/status` report the current waiting interaction summary. -- Parse remote replies into `ToolInteractionResponse`: - - Telegram/Feishu permission: `ALLOW` / `DENY` - - Telegram/Feishu question: option number or exact label - - Custom/plain-text answers when `custom` is allowed or `multiple` is true +- No open clarification items remain. +- The feature is ready for implementation and regression verification. -## Telegram Delivery +## Rollout Steps -- Add callback token state for pending interactions in `RemoteBindingStore`. -- Render permission prompts with inline `Allow` / `Deny`. -- Render single-choice questions with inline option buttons plus `Other` when custom answers are allowed. -- On callback expiry, re-read the current pending interaction and refresh the prompt instead of hard-failing. -- After a callback resolves, edit the interaction message into a resolved state, then continue conversation polling if the agent resumes. +1. Extend remote snapshot and runner contracts to surface `pendingInteraction`. +2. Parse assistant `tool_call_permission` and `question_request` blocks into a shared `RemotePendingInteraction` model. +3. Gate remote command routing around pending interactions and add `/pending`. +4. Add Telegram-specific rendering, callback token state, callback refresh, and text fallback. +5. Add Feishu-specific card rendering, text fallback, and inbound text parsing. +6. Add regression coverage for runner extraction, callback refresh, prompt resend, and channel-specific prompt delivery. +7. Update spec artifacts so acceptance, rollout, and compatibility are reviewable without tracing code. -## Feishu Delivery +## Dependencies -- Add card-style prompt builders for permission and question states. -- Extend `FeishuClient` and `FeishuRuntime` with outbound `sendCard` support. -- Prefer card delivery and fall back to plain text if card sending fails. -- Keep all responses text-based from the user side; do not add card-click callbacks. +- `RemoteConversationSnapshot.pendingInteraction` in `RemoteConversationRunner` +- `newAgentPresenter.respondToolInteraction(...)` +- Existing Telegram outbound edit/send flows in `TelegramPoller` +- Existing Feishu outbound text flow extended with card sending in `FeishuRuntime` +- In-memory callback/token state in `RemoteBindingStore` -## Data Model +## Data And API Changes +- `RemoteConversationSnapshot` + - Add `pendingInteraction: RemotePendingInteraction | null` + - Preserve `text` and `completed` semantics so remote delivery can send visible text plus a follow-up interaction prompt +- `RemoteRunnerStatus` + - Add `pendingInteraction` + - Suppress `isGenerating` while the assistant is explicitly waiting on user action - `RemotePendingInteraction` - - `type` - - `messageId` - - `toolCallId` - - `toolName` - - `toolArgs` - - optional permission metadata - - optional question metadata -- `TelegramPendingInteractionState` - - `endpointKey` - - `messageId` - - `toolCallId` - - `createdAt` -- `FeishuOutboundAction` - - `sendText` - - `sendCard` + - Include `messageId`, `toolCallId`, `toolName`, `toolArgs` + - Include permission metadata for `tool_call_permission` + - Include question metadata for `question_request` +- `RemoteCommandRouteResult` / `FeishuCommandRouteResult` + - Allow outbound interaction prompt actions in addition to normal replies/conversation execution + +## Telegram Rendering Behavior + +- Permission interactions render a dedicated prompt with inline `Allow` / `Deny` buttons. +- Single-choice questions render inline option buttons and `Other` when custom text is allowed. +- `question.multiple === true` does not render fake multi-select buttons and instead instructs the user to reply in plain text. +- Text fallback accepts: + - `ALLOW` / `DENY` for permissions + - Exact numeric replies for question options + - Exact option labels for question options + - Custom text when allowed +- Expired callback tokens do not hard-fail if the interaction still exists; the router re-reads the current pending interaction and refreshes the prompt. +- After a button press, Telegram edits the original prompt into a resolved state immediately, then continues any deferred execution in the background. + +## Feishu Rendering Behavior + +- Pending interactions render as interactive-card style outbound messages when the card API succeeds. +- Card fallback uses the full plain-text prompt, not only a short reply hint, so the user still sees permission/question details. +- Feishu remains text-response only on the inbound side: + - `ALLOW` / `DENY` for permissions + - Exact numeric replies for question options + - Exact option labels for question options + - Custom text when allowed +- `question.multiple === true` always uses plain-text answers. + +## Command Gating While Waiting + +- Blocked commands while a pending interaction exists: + - `/new` + - `/use` + - `/model` + - Unrelated plain-text new-turn input +- Allowed commands while a pending interaction exists: + - `/help` + - `/status` + - `/open` + - `/pending` +- `/pending` re-sends the current prompt for the endpoint-bound session. + +## Migration And Compatibility + +- `RemoteConversationSnapshot.pendingInteraction` is additive and does not require a persisted config migration. +- Existing Telegram and Feishu bindings remain valid. +- Existing remote sessions continue to use detached session creation and the same runner/session binding path. +- Telegram keeps inline-button interaction handling; Feishu does not introduce public callback endpoints. +- The former generic "Desktop confirmation is required" message becomes a fallback path only, not the primary remote behavior. ## Risks And Mitigations -- Stale callback tokens: rebind tokens to the current endpoint/message/tool call and refresh prompts when the interaction still exists. -- Session drift while waiting: block session-switching commands until the current interaction is resolved. -- Card delivery instability in Feishu: fall back to plain text and keep parsing on inbound text only. +- Stale callback tokens + - Mitigation: rebind tokens to `endpointKey + messageId + toolCallId` and refresh prompts when the current interaction still matches. +- Session drift while waiting + - Mitigation: block `/new`, `/use`, `/model`, and unrelated plain-text turns until the interaction is resolved. +- Feishu card delivery failures + - Mitigation: fall back to the full plain-text prompt and keep inbound parsing text-only. +- Telegram callback latency + - Mitigation: edit the prompt immediately and run continuation work off the poll loop. ## Test Strategy -- Runner tests for extracting pending interactions, responding to them, and continuing chained execution. -- Telegram router tests for button/text approval flows, `/pending`, and expired callback refresh. -- Telegram poller tests for sending prompt messages after a completed assistant response with `pendingInteraction`. -- Feishu router tests for permission/question text parsing and `/pending` card prompts. -- Feishu runtime tests for card delivery and card-to-text fallback. -- Binding-store tests for pending interaction token lifecycle. +- Runner tests + - Extract `pendingInteraction` from assistant action blocks + - Resume after tool interaction response + - Handle chained interactions on the same assistant message +- Telegram tests + - Button callbacks and text fallback + - Expired callback token refresh + - `/pending` prompt resend + - Prompt edit timing and non-blocking deferred continuation +- Feishu tests + - Card prompt generation + - Plain-text fallback content + - Text parsing for permission/question answers + - Pending command gating and `/pending` diff --git a/docs/specs/remote-tool-interactions/tasks.md b/docs/specs/remote-tool-interactions/tasks.md new file mode 100644 index 000000000..659613ab4 --- /dev/null +++ b/docs/specs/remote-tool-interactions/tasks.md @@ -0,0 +1,103 @@ +# Remote Tool Interactions Tasks + +## Readiness + +- No open clarification items remain. +- All tasks below map back to the acceptance criteria in [spec.md](./spec.md). + +## T0 Spec Artifacts + +- [x] Create and align `spec.md`, `plan.md`, and `tasks.md` +- Owner: Remote control maintainer +- Estimate: 0.5d +- Acceptance Criteria: + - Spec acceptance criteria for `pendingInteraction`, channel rendering, `/pending`, and command gating are explicitly represented in the plan/tasks artifacts. + - No unresolved clarification markers remain before the work is marked ready. + +## T1 Remote Snapshot And API Changes + +- [x] Extend `RemoteConversationSnapshot` with `pendingInteraction` +- [x] Extend runner status to expose `pendingInteraction` +- [x] Parse assistant `tool_call_permission` and `question_request` blocks into `RemotePendingInteraction` +- Owner: Electron main +- Estimate: 1d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for structured `pendingInteraction`. + - Remote delivery no longer depends on the generic desktop confirmation notice as the primary state. + +## T2 Electron Main Integration + +- [x] Add `RemoteConversationRunner.getPendingInteraction()` +- [x] Add `RemoteConversationRunner.respondToPendingInteraction()` +- [x] Continue polling the same assistant message after tool interaction responses +- Owner: Electron main +- Estimate: 1d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for remote session continuity during paused interactions. + - Chained interactions can surface one at a time without losing the bound session. + +## T3 Telegram Buttons, Callback Handling, And Text Fallback + +- [x] Render permission prompts with `Allow` / `Deny` inline buttons +- [x] Render single-choice question prompts with option buttons and `Other` when custom input is allowed +- [x] Parse `ALLOW` / `DENY`, exact numeric replies, exact labels, and custom text as appropriate +- [x] Edit the original Telegram prompt into a resolved state immediately after button selection +- Owner: Telegram remote +- Estimate: 1.5d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for Telegram permission buttons, single-choice buttons, and text fallback. + - `question.multiple === true` stays plain-text only. + +## T4 Feishu Card Rendering And Full Plain-Text Fallback + +- [x] Render pending interactions as Feishu card-style outbound actions +- [x] Fall back to the complete plain-text prompt when card delivery fails +- [x] Parse `ALLOW` / `DENY`, exact numeric replies, exact labels, and custom text as appropriate +- Owner: Feishu remote +- Estimate: 1d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for Feishu card rendering and fallback behavior. + - Card failure still preserves permission/question details in the fallback message. + +## T5 Token Refresh And Expired Callback Recovery + +- [x] Store Telegram pending interaction callback tokens in `RemoteBindingStore` +- [x] Refresh the pending prompt when an expired callback token is used and the interaction still exists +- Owner: Telegram remote +- Estimate: 0.5d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for expired Telegram callback token refresh. + - Prompt refresh only succeeds when `endpointKey`, `messageId`, and `toolCallId` still match. + +## T6 Pending Prompt Re-Send And Command Gating + +- [x] Add `/pending` for Telegram and Feishu +- [x] Block `/new`, `/use`, `/model`, and unrelated plain-text turns while waiting +- [x] Keep `/help`, `/status`, `/open`, and `/pending` available while waiting +- Owner: Remote router +- Estimate: 0.5d +- Acceptance Criteria: + - Satisfies spec acceptance criteria for `/pending`. + - Satisfies spec acceptance criteria for blocked and allowed commands while waiting. + +## T7 Tests + +- [x] Add runner tests for extraction and follow-up execution +- [x] Add Telegram tests for callback handling, `/pending`, prompt refresh, and non-blocking continuation +- [x] Add Feishu tests for text parsing and fallback behavior +- [x] Add binding/token lifecycle tests +- Owner: QA + Electron main +- Estimate: 1d +- Acceptance Criteria: + - Test coverage maps to the acceptance criteria in `spec.md`. + - Regressions in pairing, binding, `/open`, `/status`, and normal non-interaction flows are covered by targeted tests. + +## T8 Documentation And Review Notes + +- [x] Document compatibility, rollout behavior, and command gating in `plan.md` +- [x] Keep the feature scope explicit: Telegram buttons, Feishu cards, no Feishu callback endpoint +- Owner: Remote control maintainer +- Estimate: 0.5d +- Acceptance Criteria: + - Reviewers can understand rollout steps, dependencies, and compatibility notes without reading implementation files. + - The blocked commands list and allowed commands list match the implemented router behavior and `spec.md`. diff --git a/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts b/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts index f340507d9..cf8af7712 100644 --- a/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts +++ b/src/main/presenter/remoteControlPresenter/services/feishuCommandRouter.ts @@ -9,7 +9,10 @@ import type { } from '../types' import { FEISHU_REMOTE_COMMANDS, buildFeishuBindingMeta, buildFeishuEndpointKey } from '../types' import type { RemoteConversationExecution } from './remoteConversationRunner' -import { buildFeishuPendingInteractionCard } from '../feishu/feishuInteractionPrompt' +import { + buildFeishuPendingInteractionCard, + buildFeishuPendingInteractionText +} from '../feishu/feishuInteractionPrompt' import { FeishuAuthGuard } from './feishuAuthGuard' import { RemoteBindingStore } from './remoteBindingStore' import { RemoteConversationRunner } from './remoteConversationRunner' @@ -305,7 +308,7 @@ export class FeishuCommandRouter { { type: 'sendCard', card: buildFeishuPendingInteractionCard(interaction), - fallbackText: this.formatPendingTextReplyHint(interaction) + fallbackText: buildFeishuPendingInteractionText(interaction) } ] } @@ -337,15 +340,13 @@ export class FeishuCommandRouter { } if (!question.multiple) { - const optionIndex = Number.parseInt(normalized, 10) - if ( - Number.isInteger(optionIndex) && - optionIndex > 0 && - optionIndex <= question.options.length - ) { - return { - kind: 'question_option', - optionLabel: question.options[optionIndex - 1].label + if (/^\d+$/.test(normalized)) { + const optionIndex = Number.parseInt(normalized, 10) + if (optionIndex > 0 && optionIndex <= question.options.length) { + return { + kind: 'question_option', + optionLabel: question.options[optionIndex - 1].label + } } } diff --git a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts index 91a49a11e..746287ea8 100644 --- a/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts +++ b/src/main/presenter/remoteControlPresenter/telegram/telegramPoller.ts @@ -58,6 +58,8 @@ export class TelegramPoller { private stopRequested = false private loopPromise: Promise | null = null private activePollController: AbortController | null = null + private runId = 0 + private readonly backgroundTasks = new Set>() private statusSnapshot: TelegramPollerStatusSnapshot = { state: 'stopped', lastError: null, @@ -72,9 +74,10 @@ export class TelegramPoller { } this.stopRequested = false - this.loopPromise = this.runLoop().finally(() => { + const runId = ++this.runId + this.loopPromise = this.runLoop(runId).finally(() => { this.loopPromise = null - if (!this.stopRequested && this.statusSnapshot.state !== 'error') { + if (this.isCurrentRun(runId) && this.statusSnapshot.state !== 'error') { this.setStatus({ state: 'stopped' }) @@ -84,11 +87,13 @@ export class TelegramPoller { async stop(): Promise { this.stopRequested = true + this.runId += 1 this.activePollController?.abort() const loop = this.loopPromise if (loop) { await loop } + this.backgroundTasks.clear() this.setStatus({ state: 'stopped' }) @@ -98,10 +103,10 @@ export class TelegramPoller { return { ...this.statusSnapshot } } - private async runLoop(): Promise { + private async runLoop(runId: number): Promise { let backoffIndex = 0 - while (!this.stopRequested) { + while (this.isCurrentRun(runId)) { const pollSignal = this.createPollSignal() let updates: TelegramRawUpdate[] @@ -122,7 +127,7 @@ export class TelegramPoller { backoffIndex = 0 } catch (error) { - if (this.stopRequested) { + if (!this.isCurrentRun(runId)) { return } @@ -147,7 +152,7 @@ export class TelegramPoller { } for (const update of updates) { - if (this.stopRequested) { + if (!this.isCurrentRun(runId)) { return } @@ -156,9 +161,9 @@ export class TelegramPoller { this.deps.bindingStore.setPollOffset(update.update_id + 1) try { - await this.handleRawUpdate(update) + await this.handleRawUpdate(update, runId) } catch (error) { - if (this.stopRequested) { + if (!this.isCurrentRun(runId)) { return } @@ -188,7 +193,7 @@ export class TelegramPoller { }) } - private async handleRawUpdate(update: TelegramRawUpdate): Promise { + private async handleRawUpdate(update: TelegramRawUpdate, runId: number): Promise { const parsed = this.deps.parser.parseUpdate(update) if (!parsed) { return @@ -220,27 +225,62 @@ export class TelegramPoller { await this.dispatchRouteResult( target, routed, + runId, parsed.kind === 'message' && !parsed.command ? parsed : null ) if (routed.deferred) { - const deferred = await routed.deferred - await this.dispatchRouteResult(target, deferred) + this.scheduleDeferredRouteResult(target, routed.deferred, runId) } } + private scheduleDeferredRouteResult( + target: TelegramTransportTarget, + deferred: Promise, + runId: number + ): void { + const task = Promise.resolve() + .then(async () => { + const continuation = await deferred + if (!this.isCurrentRun(runId)) { + return + } + + await this.dispatchRouteResult(target, continuation, runId) + }) + .catch((error) => { + console.warn('[TelegramPoller] Deferred route dispatch failed:', error) + }) + .finally(() => { + this.backgroundTasks.delete(task) + }) + + this.backgroundTasks.add(task) + } + private async dispatchRouteResult( target: TelegramTransportTarget, routed: | Pick | RemoteCommandRouteContinuation, - reactionMessage?: TelegramInboundMessage | null + runId: number, + reactionMessage: TelegramInboundMessage | null = null ): Promise { + if (!this.isCurrentRun(runId)) { + return + } + for (const reply of routed.replies ?? []) { + if (!this.isCurrentRun(runId)) { + return + } await this.sendChunkedMessage(target, reply) } if (routed.outboundActions?.length) { + if (!this.isCurrentRun(runId)) { + return + } await this.dispatchOutboundActions(target, routed.outboundActions) } @@ -253,7 +293,7 @@ export class TelegramPoller { } try { - await this.deliverConversation(target, routed.conversation) + await this.deliverConversation(target, routed.conversation, runId) } finally { if (reactionMessage) { await this.clearIncomingReaction(reactionMessage.chatId, reactionMessage.messageId) @@ -265,19 +305,20 @@ export class TelegramPoller { target: TelegramTransportTarget, execution: NonNullable< Awaited>['conversation'] - > + >, + runId: number ): Promise { const streamMode = this.deps.bindingStore.getTelegramConfig().streamMode if (streamMode === 'final') { - await this.deliverFinalConversation(target, execution) + await this.deliverFinalConversation(target, execution, runId) return } try { - await this.deliverDraftConversation(target, execution) + await this.deliverDraftConversation(target, execution, runId) } catch (error) { console.warn('[TelegramPoller] Draft streaming failed, falling back to final mode:', error) - await this.deliverFinalConversation(target, execution) + await this.deliverFinalConversation(target, execution, runId) } } @@ -285,14 +326,15 @@ export class TelegramPoller { target: TelegramTransportTarget, execution: NonNullable< Awaited>['conversation'] - > + >, + runId: number ): Promise { const draftId = createTelegramDraftId() const startedAt = Date.now() let typingSent = false let lastDraftText = '' - while (!this.stopRequested) { + while (this.isCurrentRun(runId)) { const snapshot = await execution.getSnapshot() if (snapshot.completed) { if (snapshot.text.trim()) { @@ -321,12 +363,13 @@ export class TelegramPoller { target: TelegramTransportTarget, execution: NonNullable< Awaited>['conversation'] - > + >, + runId: number ): Promise { const startedAt = Date.now() let typingSent = false - while (!this.stopRequested) { + while (this.isCurrentRun(runId)) { const snapshot = await execution.getSnapshot() if (snapshot.completed) { if (snapshot.text.trim()) { @@ -518,6 +561,10 @@ export class TelegramPoller { return error.message.includes('terminated by other getUpdates request') } + private isCurrentRun(runId: number): boolean { + return this.runId === runId && !this.stopRequested + } + private setStatus( patch: Partial & { state?: TelegramPollerStatusSnapshot['state'] diff --git a/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts b/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts index a4a7db1f4..71afdf6c8 100644 --- a/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts +++ b/test/main/presenter/remoteControlPresenter/feishuCommandRouter.test.ts @@ -246,7 +246,7 @@ describe('FeishuCommandRouter', () => { expect.arrayContaining([ expect.objectContaining({ type: 'sendCard', - fallbackText: expect.stringContaining('option number'), + fallbackText: expect.stringContaining('Pick one'), card: expect.objectContaining({ header: expect.objectContaining({ title: expect.objectContaining({ @@ -304,4 +304,50 @@ describe('FeishuCommandRouter', () => { }) expect(result.replies).toEqual(['Selected: Beta']) }) + + it('treats prefixed numeric text as custom input instead of an option', async () => { + const runner = { + getPendingInteraction: vi.fn().mockResolvedValue({ + type: 'question', + messageId: 'assistant-4', + toolCallId: 'tool-4', + toolName: 'ask_user', + toolArgs: '{}', + question: { + question: 'Pick one', + options: [{ label: 'Alpha' }, { label: 'Beta' }], + custom: true, + multiple: false + } + }), + respondToPendingInteraction: vi.fn().mockResolvedValue({ + waitingForUserMessage: false, + execution: null + }) + } + const router = new FeishuCommandRouter({ + authGuard: { + ensureAuthorized: vi.fn().mockReturnValue({ + ok: true, + userOpenId: 'ou_123' + }), + pair: vi.fn() + } as any, + runner: runner as any, + bindingStore: createBindingStore() as any, + getRuntimeStatus: vi.fn() + }) + + const result = await router.handleMessage( + createMessage({ + text: '2 please' + }) + ) + + expect(runner.respondToPendingInteraction).toHaveBeenCalledWith('feishu:oc_100:root', { + kind: 'question_custom', + answerText: '2 please' + }) + expect(result.replies).toEqual(['Answer received: 2 please']) + }) }) diff --git a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts index 5f895e95d..4ed46adb1 100644 --- a/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts +++ b/test/main/presenter/remoteControlPresenter/telegramPoller.test.ts @@ -957,4 +957,95 @@ describe('TelegramPoller', () => { await poller.stop() }) + + it('stops without waiting for unresolved deferred route continuations', async () => { + const client = createClient() + const deferred = createDeferred<{ + conversation?: { + sessionId: string + eventId: string + getSnapshot: () => Promise<{ + messageId: string | null + text: string + completed: boolean + pendingInteraction: null + }> + } + }>() + client.getUpdates + .mockResolvedValueOnce([ + { + update_id: 2, + callback_query: { + id: 'callback-1', + from: { + id: 123 + }, + data: 'pending:token:allow', + message: { + message_id: 30, + chat: { + id: 100, + type: 'private' + } + } + } + } + ]) + .mockImplementation(createBlockingUpdates()) + + const poller = new TelegramPoller({ + client: client as any, + parser: { + parseUpdate: vi.fn().mockReturnValue({ + kind: 'callback_query', + updateId: 2, + chatId: 100, + messageThreadId: 0, + messageId: 30, + chatType: 'private', + fromId: 123, + callbackQueryId: 'callback-1', + data: 'pending:token:allow' + }) + } as any, + router: { + handleMessage: vi.fn().mockResolvedValue({ + replies: [], + outboundActions: [ + { + type: 'editMessageText', + messageId: 30, + text: 'Permission handled.\nApproved. Continuing...', + replyMarkup: null + } + ], + callbackAnswer: { + text: 'Continuing...' + }, + deferred: deferred.promise + }) + } as any, + bindingStore: { + getPollOffset: vi.fn().mockReturnValue(0), + setPollOffset: vi.fn(), + getTelegramConfig: vi.fn().mockReturnValue({ + streamMode: 'draft' + }) + } as any + }) + + await poller.start() + + await vi.waitFor(() => { + expect(client.editMessageText).toHaveBeenCalled() + }) + + await expect( + Promise.race([ + poller.stop().then(() => 'stopped'), + new Promise<'timeout'>((resolve) => setTimeout(() => resolve('timeout'), 50)) + ]) + ).resolves.toBe('stopped') + }) })