ThinkInAIXYZ · zerob13 · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026
diff --git a/docs/specs/remote-tool-interactions/plan.md b/docs/specs/remote-tool-interactions/plan.md
@@ -0,0 +1,125 @@
+# Remote Tool Interactions Plan
+
+## Summary
+
+Implement a structured remote interaction loop for Telegram and Feishu so remote endpoints can resolve paused permission and question interactions without falling back to a generic desktop-only notice. The feature stays inside Electron main and reuses the existing `RemoteConversationRunner`, `RemoteCommandRouter`, `FeishuCommandRouter`, and `newAgentPresenter.respondToolInteraction(...)` flow.
+
+## Goals
+
+- Expose `RemoteConversationSnapshot.pendingInteraction` as the canonical paused-interaction state for remote delivery.
+- Preserve the current detached-session and bound-endpoint model without adding renderer IPC.
+- Let Telegram resolve interactions with inline buttons plus text fallback.
+- Let Feishu render interaction cards and fall back to complete plain-text prompts when card delivery fails.
+- Keep command/session state safe while an interaction is unresolved.
+
+## Readiness
+
+- No open clarification items remain.
+- The feature is ready for implementation and regression verification.
+
+## Rollout Steps
+
+1. Extend remote snapshot and runner contracts to surface `pendingInteraction`.
+2. Parse assistant `tool_call_permission` and `question_request` blocks into a shared `RemotePendingInteraction` model.
+3. Gate remote command routing around pending interactions and add `/pending`.
+4. Add Telegram-specific rendering, callback token state, callback refresh, and text fallback.
+5. Add Feishu-specific card rendering, text fallback, and inbound text parsing.
+6. Add regression coverage for runner extraction, callback refresh, prompt resend, and channel-specific prompt delivery.
+7. Update spec artifacts so acceptance, rollout, and compatibility are reviewable without tracing code.
+
+## Dependencies
+
+- `RemoteConversationSnapshot.pendingInteraction` in `RemoteConversationRunner`
+- `newAgentPresenter.respondToolInteraction(...)`
+- Existing Telegram outbound edit/send flows in `TelegramPoller`
+- Existing Feishu outbound text flow extended with card sending in `FeishuRuntime`
+- In-memory callback/token state in `RemoteBindingStore`
+
+## Data And API Changes
+
+- `RemoteConversationSnapshot`
+  - Add `pendingInteraction: RemotePendingInteraction | null`
+  - Preserve `text` and `completed` semantics so remote delivery can send visible text plus a follow-up interaction prompt
+- `RemoteRunnerStatus`
+  - Add `pendingInteraction`
+  - Suppress `isGenerating` while the assistant is explicitly waiting on user action
+- `RemotePendingInteraction`
+  - Include `messageId`, `toolCallId`, `toolName`, `toolArgs`
+  - Include permission metadata for `tool_call_permission`
+  - Include question metadata for `question_request`
+- `RemoteCommandRouteResult` / `FeishuCommandRouteResult`
+  - Allow outbound interaction prompt actions in addition to normal replies/conversation execution
+
+## Telegram Rendering Behavior
+
+- Permission interactions render a dedicated prompt with inline `Allow` / `Deny` buttons.
+- Single-choice questions render inline option buttons and `Other` when custom text is allowed.
+- `question.multiple === true` does not render fake multi-select buttons and instead instructs the user to reply in plain text.
+- Text fallback accepts:
+  - `ALLOW` / `DENY` for permissions
+  - Exact numeric replies for question options
+  - Exact option labels for question options
+  - Custom text when allowed
+- Expired callback tokens do not hard-fail if the interaction still exists; the router re-reads the current pending interaction and refreshes the prompt.
+- After a button press, Telegram edits the original prompt into a resolved state immediately, then continues any deferred execution in the background.
+
+## Feishu Rendering Behavior
+
+- Pending interactions render as interactive-card style outbound messages when the card API succeeds.
+- Card fallback uses the full plain-text prompt, not only a short reply hint, so the user still sees permission/question details.
+- Feishu remains text-response only on the inbound side:
+  - `ALLOW` / `DENY` for permissions
+  - Exact numeric replies for question options
+  - Exact option labels for question options
+  - Custom text when allowed
+- `question.multiple === true` always uses plain-text answers.
+
+## Command Gating While Waiting
+
+- Blocked commands while a pending interaction exists:
+  - `/new`
+  - `/use`
+  - `/model`
+  - Unrelated plain-text new-turn input
+- Allowed commands while a pending interaction exists:
+  - `/help`
+  - `/status`
+  - `/open`
+  - `/pending`
+- `/pending` re-sends the current prompt for the endpoint-bound session.
+
+## Migration And Compatibility
+
+- `RemoteConversationSnapshot.pendingInteraction` is additive and does not require a persisted config migration.
+- Existing Telegram and Feishu bindings remain valid.
+- Existing remote sessions continue to use detached session creation and the same runner/session binding path.
+- Telegram keeps inline-button interaction handling; Feishu does not introduce public callback endpoints.
+- The former generic "Desktop confirmation is required" message becomes a fallback path only, not the primary remote behavior.
+
+## Risks And Mitigations
+
+- Stale callback tokens
+  - Mitigation: rebind tokens to `endpointKey + messageId + toolCallId` and refresh prompts when the current interaction still matches.
+- Session drift while waiting
+  - Mitigation: block `/new`, `/use`, `/model`, and unrelated plain-text turns until the interaction is resolved.
+- Feishu card delivery failures
+  - Mitigation: fall back to the full plain-text prompt and keep inbound parsing text-only.
+- Telegram callback latency
+  - Mitigation: edit the prompt immediately and run continuation work off the poll loop.
+
+## Test Strategy
+
+- Runner tests
+  - Extract `pendingInteraction` from assistant action blocks
+  - Resume after tool interaction response
+  - Handle chained interactions on the same assistant message
+- Telegram tests
+  - Button callbacks and text fallback
+  - Expired callback token refresh
+  - `/pending` prompt resend
+  - Prompt edit timing and non-blocking deferred continuation
+- Feishu tests
+  - Card prompt generation
+  - Plain-text fallback content
+  - Text parsing for permission/question answers
+  - Pending command gating and `/pending`
diff --git a/docs/specs/remote-tool-interactions/spec.md b/docs/specs/remote-tool-interactions/spec.md
@@ -0,0 +1,47 @@
+# Remote Tool Interactions
+
+## Summary
+
+Extend remote control so Telegram and Feishu can surface structured pending tool interactions instead of collapsing them into a generic desktop-only notice. Remote users must be able to resolve permission requests and `user ask` style questions from the chat channel itself, while the desktop app keeps the existing agent execution and permission backends.
+
+## User Stories
+
+- As a Telegram remote user, I can approve or deny a tool permission request directly from inline buttons.
+- As a Telegram remote user, I can answer a pending question by tapping an option or replying with text when custom input is allowed.
+- As a Feishu remote user, I can see a clear card-style prompt for a pending permission or question and reply with a supported text answer.
+- As a desktop user, I do not lose remote session continuity when a tool interaction pauses the assistant.
+- As a paired remote user, I can ask the bot to re-show the current pending interaction without opening the desktop app.
+
+## Acceptance Criteria
+
+- `RemoteConversationSnapshot` includes `pendingInteraction` with structured `permission` or `question` data when the latest assistant message is waiting on user action.
+- Remote delivery no longer relies on the generic "Desktop confirmation is required" path as the primary behavior.
+- Telegram pending permission prompts render inline `Allow` and `Deny` buttons and also accept `ALLOW` / `DENY` text replies.
+- Telegram single-choice question prompts render inline option buttons and an `Other` button when custom answers are allowed.
+- Telegram multi-answer questions do not render fake multi-select buttons and instruct the user to reply with plain text.
+- Expired Telegram interaction callback tokens refresh the prompt when the underlying pending interaction still exists.
+- Feishu pending prompts render as interactive-card style outbound messages when possible and fall back to plain text when card delivery fails.
+- Feishu accepts `ALLOW` / `DENY`, option numbers, exact option labels, and custom text according to the pending question shape.
+- `/pending` re-sends the current prompt for both Telegram and Feishu.
+- While a pending interaction exists, `/new`, `/use`, `/model`, and plain new-turn messages are blocked from creating unrelated session state changes.
+- `/help`, `/status`, `/open`, and `/pending` remain available while a pending interaction exists.
+- Existing remote pairing, binding, `/open`, `/status`, and normal non-interaction conversations continue to work.
+
+## Constraints
+
+- Keep all logic in Electron main; do not add a new renderer IPC surface for this feature.
+- Telegram continues to use callback-query buttons; Feishu does not introduce a public HTTP callback service for card clicks.
+- Remote bot copy remains English in this increment.
+- Each endpoint only resolves the first pending interaction for its bound session at a time.
+
+## Non-Goals
+
+- Feishu clickable approval callbacks.
+- Locale negotiation for remote bot messages.
+- Arbitrary rich remote workflows beyond permission requests and question requests.
+
+## Compatibility
+
+- Existing Telegram and Feishu bindings remain valid.
+- Existing remote sessions continue to use `RemoteConversationRunner` and detached session creation.
+- Structured pending interaction handling is additive and only changes how remote channels render and answer paused assistant states.
diff --git a/docs/specs/remote-tool-interactions/tasks.md b/docs/specs/remote-tool-interactions/tasks.md
@@ -0,0 +1,103 @@
+# Remote Tool Interactions Tasks
+
+## Readiness
+
+- No open clarification items remain.
+- All tasks below map back to the acceptance criteria in [spec.md](./spec.md).
+
+## T0 Spec Artifacts
+
+- [x] Create and align `spec.md`, `plan.md`, and `tasks.md`
+- Owner: Remote control maintainer
+- Estimate: 0.5d
+- Acceptance Criteria:
+  - Spec acceptance criteria for `pendingInteraction`, channel rendering, `/pending`, and command gating are explicitly represented in the plan/tasks artifacts.
+  - No unresolved clarification markers remain before the work is marked ready.
+
+## T1 Remote Snapshot And API Changes
+
+- [x] Extend `RemoteConversationSnapshot` with `pendingInteraction`
+- [x] Extend runner status to expose `pendingInteraction`
+- [x] Parse assistant `tool_call_permission` and `question_request` blocks into `RemotePendingInteraction`
+- Owner: Electron main
+- Estimate: 1d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for structured `pendingInteraction`.
+  - Remote delivery no longer depends on the generic desktop confirmation notice as the primary state.
+
+## T2 Electron Main Integration
+
+- [x] Add `RemoteConversationRunner.getPendingInteraction()`
+- [x] Add `RemoteConversationRunner.respondToPendingInteraction()`
+- [x] Continue polling the same assistant message after tool interaction responses
+- Owner: Electron main
+- Estimate: 1d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for remote session continuity during paused interactions.
+  - Chained interactions can surface one at a time without losing the bound session.
+
+## T3 Telegram Buttons, Callback Handling, And Text Fallback
+
+- [x] Render permission prompts with `Allow` / `Deny` inline buttons
+- [x] Render single-choice question prompts with option buttons and `Other` when custom input is allowed
+- [x] Parse `ALLOW` / `DENY`, exact numeric replies, exact labels, and custom text as appropriate
+- [x] Edit the original Telegram prompt into a resolved state immediately after button selection
+- Owner: Telegram remote
+- Estimate: 1.5d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for Telegram permission buttons, single-choice buttons, and text fallback.
+  - `question.multiple === true` stays plain-text only.
+
+## T4 Feishu Card Rendering And Full Plain-Text Fallback
+
+- [x] Render pending interactions as Feishu card-style outbound actions
+- [x] Fall back to the complete plain-text prompt when card delivery fails
+- [x] Parse `ALLOW` / `DENY`, exact numeric replies, exact labels, and custom text as appropriate
+- Owner: Feishu remote
+- Estimate: 1d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for Feishu card rendering and fallback behavior.
+  - Card failure still preserves permission/question details in the fallback message.
+
+## T5 Token Refresh And Expired Callback Recovery
+
+- [x] Store Telegram pending interaction callback tokens in `RemoteBindingStore`
+- [x] Refresh the pending prompt when an expired callback token is used and the interaction still exists
+- Owner: Telegram remote
+- Estimate: 0.5d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for expired Telegram callback token refresh.
+  - Prompt refresh only succeeds when `endpointKey`, `messageId`, and `toolCallId` still match.
+
+## T6 Pending Prompt Re-Send And Command Gating
+
+- [x] Add `/pending` for Telegram and Feishu
+- [x] Block `/new`, `/use`, `/model`, and unrelated plain-text turns while waiting
+- [x] Keep `/help`, `/status`, `/open`, and `/pending` available while waiting
+- Owner: Remote router
+- Estimate: 0.5d
+- Acceptance Criteria:
+  - Satisfies spec acceptance criteria for `/pending`.
+  - Satisfies spec acceptance criteria for blocked and allowed commands while waiting.
+
+## T7 Tests
+
+- [x] Add runner tests for extraction and follow-up execution
+- [x] Add Telegram tests for callback handling, `/pending`, prompt refresh, and non-blocking continuation
+- [x] Add Feishu tests for text parsing and fallback behavior
+- [x] Add binding/token lifecycle tests
+- Owner: QA + Electron main
+- Estimate: 1d
+- Acceptance Criteria:
+  - Test coverage maps to the acceptance criteria in `spec.md`.
+  - Regressions in pairing, binding, `/open`, `/status`, and normal non-interaction flows are covered by targeted tests.
+
+## T8 Documentation And Review Notes
+
+- [x] Document compatibility, rollout behavior, and command gating in `plan.md`
+- [x] Keep the feature scope explicit: Telegram buttons, Feishu cards, no Feishu callback endpoint
+- Owner: Remote control maintainer
+- Estimate: 0.5d
+- Acceptance Criteria:
+  - Reviewers can understand rollout steps, dependencies, and compatibility notes without reading implementation files.
+  - The blocked commands list and allowed commands list match the implemented router behavior and `spec.md`.
diff --git a/package.json b/package.json
@@ -73,7 +73,7 @@
     "@larksuiteoapi/node-sdk": "^1.60.0",
     "@modelcontextprotocol/sdk": "^1.28.0",
     "axios": "^1.13.6",
-    "better-sqlite3-multiple-ciphers": "12.4.1",
+    "better-sqlite3-multiple-ciphers": "12.8.0",
     "cheerio": "^1.2.0",
     "chokidar": "^5.0.0",
     "compare-versions": "^6.1.1",
@@ -148,7 +148,7 @@
     "clsx": "^2.1.1",
     "cross-env": "^10.1.0",
     "dayjs": "^1.11.19",
-    "electron": "^37.10.3",
+    "electron": "^39.8.5",
     "electron-builder": "26.0.12",
     "electron-vite": "^4.0.1",
     "jsdom": "^26.1.0",

diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts
@@ -1,6 +1,6 @@
 import * as Lark from '@larksuiteoapi/node-sdk'
 import type { EventHandles } from '@larksuiteoapi/node-sdk'
-import type { FeishuTransportTarget } from '../types'
+import type { FeishuInteractiveCardPayload, FeishuTransportTarget } from '../types'
 
 const FEISHU_OUTBOUND_TEXT_LIMIT = 8_000
 
@@ -18,6 +18,8 @@ const createTextPayload = (text: string): string =>
     text
   })
 
+const createCardPayload = (card: FeishuInteractiveCardPayload): string => JSON.stringify(card)
+
 const chunkFeishuText = (text: string): string[] => {
   const normalized = text.trim() || '(No text output)'
   if (normalized.length <= FEISHU_OUTBOUND_TEXT_LIMIT) {
@@ -157,4 +159,32 @@ export class FeishuClient {
       })
     }
   }
+
+  async sendCard(target: FeishuTransportTarget, card: FeishuInteractiveCardPayload): Promise<void> {
+    const content = createCardPayload(card)
+    if (target.replyToMessageId) {
+      await this.sdk.im.message.reply({
+        path: {
+          message_id: target.replyToMessageId
+        },
+        data: {
+          content,
+          msg_type: 'interactive',
+          reply_in_thread: Boolean(target.threadId)
+        }
+      })
+      return
+    }
+
+    await this.sdk.im.message.create({
+      params: {
+        receive_id_type: 'chat_id'
+      },
+      data: {
+        receive_id: target.chatId,
+        msg_type: 'interactive',
+        content
+      }
+    })
+  }
 }