diff --git a/CHANGELOG.md b/CHANGELOG.md index 1854d3c6..5fc3e165 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## v3.6.3 合并转发快照、附件 UID 分析与 Release 下载说明 + +本版本聚焦合并转发在协议端不可二次读取时的可用性:收到消息时先把可访问的转发树按会话保存为本地快照,后续工具读取优先复用快照,避免内层转发过期或回源失败导致 AI 看不到内容。同时补齐文件分析 Agent 对内部附件 UID 的直接解析能力,并澄清 Release 产物的下载选择。 + +- 新增合并转发快照缓存。消息预处理阶段会递归保存当前可访问的合并转发树到 `data/cache/forward_snapshots/`,保留实时上下文中的 `` 语义;快照按会话隔离,并合并同一转发的并发抓取。 +- 改进 `messages.get_forward_msg`。工具支持 `forward_` UID 与原始 ID 优先读取本地快照,缺失时再回源 OneBot 并补写快照;协议端无法二次读取内层转发时,会返回明确诊断和可见原始字段,而不是只报空内容。 +- 修复合并转发当前轮上下文重复。Prompt 构建在剔除当前消息历史副本时优先匹配 `message_id`,避免历史中的递归展开文本与实时 `` 在同一轮重复注入。 +- 增强文件分析附件链路。`file_analysis_agent` 的 `analyze_multimodal` 可直接接收 `pic_` / `file_` UID,按当前会话作用域解析并本地化后再分析;Agent 提示同步明确不要改写或猜测附件 UID。 +- 澄清 Release 下载选择。README 与部署、构建、App 文档补充说明:部署 QQ Bot 通常不需要下载 Release 客户端安装包,Console / Chat 仅作为远程管理或原生聊天的可选组件。 +- 同步 3.6.3 版本号与测试覆盖。Python 包、Console、Chat、Tauri 与 lock 文件版本统一更新;新增合并转发快照、转发读取回退、多模态附件 UID 和当前消息去重回归测试。 + +--- + ## v3.6.2 合并转发 UID、表情包跟进与协调器清理 本版本继续收敛消息附件链路和协调器结构:合并转发可作为会话内 `forward_` UID 被 AI 按层读取和复用,表情包跟进策略更稳定,同时移除旧版协调器兼容模块,并完成 3.6.2 版本号同步。 diff --git a/README.md b/README.md index b7d19cc3..edb5da46 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,32 @@ [点击添加官方实例QQ](https://qm.qq.com/q/cvjJoNysGA) +## Release 下载速查 + +如果只是部署和运行 QQ Bot,通常**不需要**在 GitHub Release 的 Assets 里下载任何文件。推荐按下方[快速开始](#-快速开始-源码模式)使用源码部署;只想快速体验命令行入口时,可使用 `pip install -U Undefined-bot` 或 `uv tool install Undefined-bot`。 + +Release 里的安装包是可选组件,不是 Bot 服务本体: + +| 目标 | 是否需要下载 Release | 选择 | +| --- | --- | --- | +| 部署 / 运行 QQ Bot | 不需要 | 源码部署,启动 `uv run Undefined-webui`;或使用 `pip` / `uv tool` 安装 Python 包 | +| 远程管理已有实例 | 可选 | `Undefined-Console-*`,用于连接 Management API 并打开远程 WebUI | +| 使用原生聊天客户端 | 可选 | `Undefined-Chat-*`,用于连接 Runtime API 聊天 | +| 离线安装 / 镜像缓存 Python 包 | 可选 | `undefined_bot-*.whl` 或 `undefined_bot-*.tar.gz` | + +平台文件选择: + +| 平台 | 推荐下载 | +| --- | --- | +| Windows x64 | `*-windows-x64-setup.exe`;批量部署或系统管理场景可选 `.msi` | +| macOS Apple Silicon | `*-macos-arm64.dmg` | +| macOS Intel | `*-macos-x64.dmg` | +| Debian / Ubuntu | `*.deb` | +| 其他 Linux x64 | `*.AppImage` | +| Android 常见手机 / 平板 | `*-android-arm64-v8a-release.apk`;旧 32 位设备选 `armeabi-v7a`,模拟器按需选 `x86_64` / `x86` | + +Console 和 Chat 都需要连接到已经运行的 Undefined 服务。首次部署请先启动 `Undefined-webui`,完成配置和 Bot 启动后,再按需使用这些客户端连接。 + ## ⚡ 核心特性 - **Skills 架构**:全新设计的技能系统,将基础工具(Tools)与智能代理(Agents)分层管理,支持自动发现与注册。 @@ -126,7 +152,7 @@ uv run Undefined-webui # cp config.toml.example config.toml ``` -> 浏览器是默认入口;如果你下载了 Release 中的桌面端或 Android 安装包,也可以在完成首轮密码设置后,连接到同一个 Management API 地址进行远程管理。 +> 浏览器是默认入口;如果你按上方 [Release 下载速查](#release-下载速查)下载了桌面端或 Android 安装包,也可以在完成首轮密码设置后,连接到同一个 Management API 地址进行远程管理。 --- diff --git a/apps/undefined-chat/package-lock.json b/apps/undefined-chat/package-lock.json index 0f669ad6..7a8047e4 100644 --- a/apps/undefined-chat/package-lock.json +++ b/apps/undefined-chat/package-lock.json @@ -1,12 +1,12 @@ { "name": "undefined-chat", - "version": "3.6.2", + "version": "3.6.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "undefined-chat", - "version": "3.6.2", + "version": "3.6.3", "dependencies": { "@tauri-apps/api": "^2.3.0", "@tauri-apps/plugin-dialog": "^2.7.1", diff --git a/apps/undefined-chat/package.json b/apps/undefined-chat/package.json index 9bc32cae..ffe8de95 100644 --- a/apps/undefined-chat/package.json +++ b/apps/undefined-chat/package.json @@ -1,7 +1,7 @@ { "name": "undefined-chat", "private": true, - "version": "3.6.2", + "version": "3.6.3", "type": "module", "scripts": { "tauri": "tauri", diff --git a/apps/undefined-chat/src-tauri/Cargo.lock b/apps/undefined-chat/src-tauri/Cargo.lock index c3fbf761..1b574853 100644 --- a/apps/undefined-chat/src-tauri/Cargo.lock +++ b/apps/undefined-chat/src-tauri/Cargo.lock @@ -5431,7 +5431,7 @@ dependencies = [ [[package]] name = "undefined_chat" -version = "3.6.2" +version = "3.6.3" dependencies = [ "futures-util", "keyring", diff --git a/apps/undefined-chat/src-tauri/Cargo.toml b/apps/undefined-chat/src-tauri/Cargo.toml index c452e146..dd288106 100644 --- a/apps/undefined-chat/src-tauri/Cargo.toml +++ b/apps/undefined-chat/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "undefined_chat" -version = "3.6.2" +version = "3.6.3" description = "Undefined native chat client" authors = ["Undefined contributors"] license = "MIT" diff --git a/apps/undefined-chat/src-tauri/tauri.conf.json b/apps/undefined-chat/src-tauri/tauri.conf.json index 0b25919a..c4c04889 100644 --- a/apps/undefined-chat/src-tauri/tauri.conf.json +++ b/apps/undefined-chat/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "Undefined Chat", - "version": "3.6.2", + "version": "3.6.3", "identifier": "com.undefined.chat", "build": { "beforeDevCommand": "npm run dev", diff --git a/apps/undefined-console/package-lock.json b/apps/undefined-console/package-lock.json index 60f3e9a7..95c14004 100644 --- a/apps/undefined-console/package-lock.json +++ b/apps/undefined-console/package-lock.json @@ -1,12 +1,12 @@ { "name": "undefined-console", - "version": "3.6.2", + "version": "3.6.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "undefined-console", - "version": "3.6.2", + "version": "3.6.3", "dependencies": { "@tauri-apps/api": "^2.3.0", "@tauri-apps/plugin-http": "^2.3.0" diff --git a/apps/undefined-console/package.json b/apps/undefined-console/package.json index b025051f..660a2fa3 100644 --- a/apps/undefined-console/package.json +++ b/apps/undefined-console/package.json @@ -1,7 +1,7 @@ { "name": "undefined-console", "private": true, - "version": "3.6.2", + "version": "3.6.3", "type": "module", "scripts": { "tauri": "tauri", diff --git a/apps/undefined-console/src-tauri/Cargo.lock b/apps/undefined-console/src-tauri/Cargo.lock index 125ed043..23fff594 100644 --- a/apps/undefined-console/src-tauri/Cargo.lock +++ b/apps/undefined-console/src-tauri/Cargo.lock @@ -4063,7 +4063,7 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "undefined_console" -version = "3.6.2" +version = "3.6.3" dependencies = [ "serde", "serde_json", diff --git a/apps/undefined-console/src-tauri/Cargo.toml b/apps/undefined-console/src-tauri/Cargo.toml index b0787ca2..e373c291 100644 --- a/apps/undefined-console/src-tauri/Cargo.toml +++ b/apps/undefined-console/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "undefined_console" -version = "3.6.2" +version = "3.6.3" description = "Undefined cross-platform management console" authors = ["Undefined contributors"] license = "MIT" diff --git a/apps/undefined-console/src-tauri/tauri.conf.json b/apps/undefined-console/src-tauri/tauri.conf.json index 227c744b..546eabf7 100644 --- a/apps/undefined-console/src-tauri/tauri.conf.json +++ b/apps/undefined-console/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "Undefined Console", - "version": "3.6.2", + "version": "3.6.3", "identifier": "com.undefined.console", "build": { "beforeDevCommand": "npm run dev", diff --git a/docs/app.md b/docs/app.md index 6267e5f2..cfd03a07 100644 --- a/docs/app.md +++ b/docs/app.md @@ -125,6 +125,8 @@ Android 端仍然走同一套连接模型,但 UI 目标是: ## 8. Release 产物 +普通用户如果只是部署 Bot,不需要下载这些 App 产物;先运行 `Undefined-webui` 即可。需要远程管理或原生聊天客户端时,再按 [README — Release 下载速查](../README.md#release-下载速查) 选择对应平台文件。 + 每次 `v*` tag 发布时,Release workflow 计划同步上传: - Python:`wheel` + `sdist` diff --git a/docs/build.md b/docs/build.md index 6eebb9c9..2f7a0148 100644 --- a/docs/build.md +++ b/docs/build.md @@ -351,6 +351,8 @@ npm install ## 9. Release 产物矩阵 +面向普通用户的下载选择见 [README — Release 下载速查](../README.md#release-下载速查);本节只记录构建和发布矩阵。部署 Bot 本身不需要下载 Console / Chat 客户端安装包。 + 每次正式 Release 计划上传: - Python diff --git a/docs/configuration.md b/docs/configuration.md index bef89406..dd2a92fb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -580,7 +580,7 @@ Prompt caching 补充: 外部接收的远程图片或文件默认会先下载到附件缓存再生成 UID,避免后续 URL 失效;大文件超过阈值时,UID 仍会生成,但绑定的是 URL 引用而不是缓存文件,AI 可在上下文中看到原始 `source_ref`。如果本地缓存因总容量或时间清理被删除,但记录仍保留 URL,后续需要文件内容时会优先按 URL 回源下载。 -合并转发会复用同一注册表登记为 `forward_...` UID,并在实时 AI 输入中显示为 ``。历史记录仍保留递归展开后的文本;需要查看实时上下文里的转发内容时,AI 会调用 `messages.get_forward_msg` 按层读取,内层合并转发会继续分配新的 `forward_...` UID。 +合并转发会复用同一注册表登记为 `forward_...` UID,并在实时 AI 输入中显示为 ``。收到合并转发时会在预处理阶段递归保存当前可访问的转发树到 `data/cache/forward_snapshots/`,后续 `messages.get_forward_msg` 读取时优先使用本地快照;缺失时才回源 OneBot 并补写快照。历史记录仍保留递归展开后的文本,但同一轮 prompt 会按 `message_id` 剔除当前消息的历史副本,因此实时上下文只保留 UID;需要查看第一层或内层内容时,AI 会调用工具按层读取,内层合并转发会继续分配新的 `forward_...` UID。如果协议端无法二次读取内层转发,会返回明确诊断和可见原始字段。 ### 4.10.2 `[message_batcher]` 同 sender 短时消息合并 diff --git a/docs/deployment.md b/docs/deployment.md index 6382d7c1..36889c81 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -2,8 +2,10 @@ 提供源码部署与 pip/uv tool 安装两种方式:**源码部署是推荐的首选方式**,功能完整且经过充分测试;pip/uv tool 安装适合快速体验,但部分功能支持尚不完善。 +> **Release 下载提示**:如果目的是部署 QQ Bot,不需要在 GitHub Release 的 Assets 中挑客户端安装包;按本文源码部署或 pip/uv tool 安装即可。Release 中的 `Undefined-Console-*` 和 `Undefined-Chat-*` 是可选客户端,选择说明见 [README — Release 下载速查](../README.md#release-下载速查)。 +> > **作为 Python 库嵌入**:若你不需要启动 QQ Bot CLI,而是要在自己的应用或测试中复用 Undefined 组件(配置、`AIClient`、Skills、认知记忆等),请参阅 [Python 库 API 参考](python-api.md) 与 [配置详解 — 库嵌入配置](configuration.md#2-库嵌入配置)。CLI 入口(`Undefined` / `Undefined-webui`)行为不受库嵌入 API 影响。 - +> > Python 版本要求:`3.11`~`3.13`(包含)。 > > 若使用 `uv`,通常不需要你手动限制系统 Python 版本;`uv` 会根据项目约束自动选择/下载兼容解释器。 diff --git a/docs/pipelines.md b/docs/pipelines.md index 386f3272..399b3c99 100644 --- a/docs/pipelines.md +++ b/docs/pipelines.md @@ -6,8 +6,8 @@ ## 运行顺序 -1. `MessageHandler` 先并行执行消息预处理:附件收集、历史文本解析、昵称或群信息读取等。图片、文件等媒体会登记为附件 UID,并在 AI 可见正文中统一写作 ``;合并转发会登记为 ``,不在实时 AI 输入中自动展开。 -2. 用户消息先写入历史。历史记录仍会递归展开合并转发文本,保持历史检索和旧行为兼容;实时 AI 输入只保留 forward UID,AI 需要查看时调用 `messages.get_forward_msg` 按层读取。 +1. `MessageHandler` 先并行执行消息预处理:附件收集、历史文本解析、昵称或群信息读取等。图片、文件等媒体会登记为附件 UID,并在 AI 可见正文中统一写作 ``;合并转发会登记为 ``,同时递归保存当前可访问的转发树快照,不在实时 AI 输入中自动展开。 +2. 用户消息先写入历史。历史记录仍会递归展开合并转发文本,保持历史检索和旧行为兼容;同一轮 prompt 会剔除当前消息的历史副本,实时 AI 输入只保留 forward UID,AI 需要查看第一层或内层内容时调用 `messages.get_forward_msg` 按层读取。 3. 若消息命中斜杠命令,立即分发命令并结束本轮后续流程;命令输入和命令输出会写入历史,供后续 AI 轮次读取。 4. 未命中命令时,`PipelineRegistry` 并行调用所有已注册管线的 `detect(context)`。 5. 对所有命中的管线,并行调用对应的 `process(detection, context)`。 diff --git a/docs/usage.md b/docs/usage.md index 5fc87cea..3df41c5b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -184,7 +184,7 @@ Undefined 搭载了基于 ChromaDB 向量数据库的后台认知系统,无需 | `messages.send_text_file` | 将文本内容生成文件后发送 | | `messages.send_url_file` | 下载指定 URL 的文件后发送 | | `messages.send_group_sign` | 执行群签到操作 | -| `messages.get_forward_msg` | 按层读取合并转发内容;支持 `` 和旧合并转发 ID,可用 `offset`/`limit` 分页查看更多 | +| `messages.get_forward_msg` | 按层读取合并转发内容;支持 `` 和旧合并转发 ID,优先使用收到消息时递归保存的本地快照,缺失时回源 OneBot,可用 `offset`/`limit` 分页查看更多 | --- diff --git a/pyproject.toml b/pyproject.toml index 3b8062f9..9ca5f7f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "Undefined-bot" -version = "3.6.2" +version = "3.6.3" description = "QQ bot platform with cognitive memory architecture and multi-agent Skills, via OneBot V11." readme = "README.md" authors = [ diff --git a/src/Undefined/__init__.py b/src/Undefined/__init__.py index 6472d018..8a86b5da 100644 --- a/src/Undefined/__init__.py +++ b/src/Undefined/__init__.py @@ -24,7 +24,7 @@ from .skills.registry import BaseRegistry as BaseRegistry from .skills.tools import ToolRegistry as ToolRegistry -__version__ = "3.6.2" +__version__ = "3.6.3" # symbol -> (module_path, attribute_name);首次访问时才 importlib 加载 _LAZY_IMPORTS: dict[str, tuple[str, str]] = { diff --git a/src/Undefined/ai/prompts/current_input.py b/src/Undefined/ai/prompts/current_input.py index 39770e45..b43010fb 100644 --- a/src/Undefined/ai/prompts/current_input.py +++ b/src/Undefined/ai/prompts/current_input.py @@ -90,15 +90,15 @@ def build_current_input_per_message_query_texts( def _history_msg_matches_signature( msg: dict[str, Any], signature: CurrentMessageSignature ) -> bool: + history_message_id = str(msg.get("message_id", "") or "").strip() + if signature.message_id and history_message_id: + return history_message_id == signature.message_id + sig_sender_id = signature.sender_id.strip() sig_content = signature.content.strip() if not sig_sender_id or not sig_content: return False - history_message_id = str(msg.get("message_id", "") or "").strip() - if signature.message_id and history_message_id: - return history_message_id == signature.message_id - last_sender_id = str(msg.get("user_id", "") or "").strip() last_content = str(msg.get("message", "") or "").strip() if last_sender_id != sig_sender_id or last_content != sig_content: diff --git a/src/Undefined/attachments/__init__.py b/src/Undefined/attachments/__init__.py index 4f978ccf..57404444 100644 --- a/src/Undefined/attachments/__init__.py +++ b/src/Undefined/attachments/__init__.py @@ -10,6 +10,11 @@ RegisteredMessageAttachments, RenderedRichMessage, ) +from Undefined.attachments.forward_snapshot import ( + load_forward_snapshot, + save_forward_snapshot, + snapshot_forward_tree, +) from Undefined.attachments.registry import AttachmentRegistry from Undefined.attachments.render import ( dispatch_pending_file_sends, @@ -40,8 +45,11 @@ "attachment_refs_to_xml", "build_attachment_scope", "dispatch_pending_file_sends", + "load_forward_snapshot", "register_message_attachments", "render_message_with_attachments", "render_message_with_pic_placeholders", + "save_forward_snapshot", "scope_from_context", + "snapshot_forward_tree", ] diff --git a/src/Undefined/attachments/forward_snapshot.py b/src/Undefined/attachments/forward_snapshot.py new file mode 100644 index 00000000..5ea8a38f --- /dev/null +++ b/src/Undefined/attachments/forward_snapshot.py @@ -0,0 +1,286 @@ +"""合并转发节点快照缓存。 + +OneBot 协议端可能只允许在收到外层合并转发时读取内层内容;之后再用 +内层 ID 调 ``get_forward_msg`` 可能返回空。这里按会话作用域保存已见节点, +供 ``messages.get_forward_msg`` 在协议端不可回源时回退。 +""" + +from __future__ import annotations + +import asyncio +from datetime import datetime +import hashlib +import logging +from pathlib import Path +from typing import Any, Awaitable, Callable, Mapping + +from Undefined.utils import io +from Undefined.utils.paths import FORWARD_SNAPSHOT_CACHE_DIR + +logger = logging.getLogger(__name__) + +_MAX_RECURSIVE_SNAPSHOT_DEPTH = 3 +_MAX_RECURSIVE_SNAPSHOT_NODES = 50 +_snapshot_locks: dict[str, asyncio.Lock] = {} +_snapshot_lock_users: dict[str, int] = {} +_snapshot_inflight: dict[str, asyncio.Task[None]] = {} + + +def _snapshot_key(scope_key: str, forward_id: str) -> str: + payload = f"{scope_key}\n{forward_id}".encode("utf-8") + return hashlib.sha256(payload).hexdigest() + + +def _snapshot_path(scope_key: str, forward_id: str) -> Path: + return FORWARD_SNAPSHOT_CACHE_DIR / f"{_snapshot_key(scope_key, forward_id)}.json" + + +def _snapshot_lock(scope_key: str, forward_id: str) -> tuple[str, asyncio.Lock]: + key = _snapshot_key(scope_key, forward_id) + lock = _snapshot_locks.get(key) + if lock is None: + lock = asyncio.Lock() + _snapshot_locks[key] = lock + _snapshot_lock_users[key] = _snapshot_lock_users.get(key, 0) + 1 + return key, lock + + +def _release_snapshot_lock(key: str, lock: asyncio.Lock) -> None: + users = _snapshot_lock_users.get(key, 0) - 1 + if users > 0: + _snapshot_lock_users[key] = users + return + _snapshot_lock_users.pop(key, None) + if _snapshot_locks.get(key) is lock and not lock.locked(): + _snapshot_locks.pop(key, None) + + +def _clean_json_value(value: Any) -> Any: + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, Mapping): + cleaned: dict[str, Any] = {} + for raw_key, raw_value in value.items(): + key = str(raw_key).strip() + if not key: + continue + cleaned[key] = _clean_json_value(raw_value) + return cleaned + if isinstance(value, (list, tuple)): + return [_clean_json_value(item) for item in value] + return str(value) + + +def normalize_forward_nodes_for_snapshot(nodes: Any) -> list[dict[str, Any]]: + """把 OneBot 返回的合并转发节点清洗为可持久化列表。""" + if isinstance(nodes, Mapping): + messages = nodes.get("messages") + raw_nodes = messages if isinstance(messages, list) else [] + elif isinstance(nodes, list): + raw_nodes = nodes + else: + raw_nodes = [] + + cleaned_nodes: list[dict[str, Any]] = [] + for node in raw_nodes: + if not isinstance(node, Mapping): + continue + cleaned = _clean_json_value(node) + if isinstance(cleaned, dict): + cleaned_nodes.append(cleaned) + return cleaned_nodes + + +async def save_forward_snapshot( + *, + scope_key: str, + forward_id: str, + nodes: Any, +) -> bool: + """保存合并转发节点快照;无有效节点时不写入。""" + normalized_scope = str(scope_key or "").strip() + normalized_forward_id = str(forward_id or "").strip() + if not normalized_scope or not normalized_forward_id: + return False + + normalized_nodes = normalize_forward_nodes_for_snapshot(nodes) + if not normalized_nodes: + return False + + payload = { + "scope_key": normalized_scope, + "forward_id": normalized_forward_id, + "created_at": datetime.now().isoformat(timespec="seconds"), + "nodes": normalized_nodes, + } + await io.write_json( + _snapshot_path(normalized_scope, normalized_forward_id), + payload, + use_lock=True, + ) + return True + + +async def load_forward_snapshot( + *, + scope_key: str, + forward_id: str, +) -> list[dict[str, Any]]: + """读取合并转发节点快照;不存在或格式不符时返回空列表。""" + normalized_scope = str(scope_key or "").strip() + normalized_forward_id = str(forward_id or "").strip() + if not normalized_scope or not normalized_forward_id: + return [] + + raw = await io.read_json( + _snapshot_path(normalized_scope, normalized_forward_id), + use_lock=False, + ) + if not isinstance(raw, Mapping): + return [] + if str(raw.get("scope_key", "") or "") != normalized_scope: + return [] + if str(raw.get("forward_id", "") or "") != normalized_forward_id: + return [] + return normalize_forward_nodes_for_snapshot(raw.get("nodes")) + + +def _extract_forward_id(data: Mapping[str, Any]) -> str: + forward_id = data.get("id") or data.get("resid") or data.get("message_id") + return str(forward_id).strip() if forward_id is not None else "" + + +def _normalize_message_segments(message: Any) -> list[Mapping[str, Any]]: + if isinstance(message, list): + return [item for item in message if isinstance(item, Mapping)] + if isinstance(message, Mapping): + return [message] + if isinstance(message, str): + return [{"type": "text", "data": {"text": message}}] + return [] + + +def _iter_nested_forward_ids(nodes: list[dict[str, Any]]) -> list[str]: + forward_ids: list[str] = [] + seen: set[str] = set() + for node in nodes: + raw_message = ( + node.get("content") or node.get("message") or node.get("raw_message") + ) + for segment in _normalize_message_segments(raw_message): + if str(segment.get("type", "") or "").strip().lower() != "forward": + continue + raw_data = segment.get("data", {}) + data = raw_data if isinstance(raw_data, Mapping) else {} + forward_id = _extract_forward_id(data) + if forward_id and forward_id not in seen: + seen.add(forward_id) + forward_ids.append(forward_id) + return forward_ids + + +async def snapshot_forward_tree( + *, + scope_key: str, + forward_id: str, + get_forward_messages: Callable[[str], Awaitable[Any]], + max_depth: int = _MAX_RECURSIVE_SNAPSHOT_DEPTH, + max_nodes: int = _MAX_RECURSIVE_SNAPSHOT_NODES, +) -> None: + """递归抓取并保存当前可访问的合并转发树。 + + 同一 ``scope_key + forward_id`` 在进程内会合并并发抓取,避免多个消息或 + 工具调用同时触发同一层 OneBot 请求。 + """ + normalized_scope = str(scope_key or "").strip() + normalized_forward_id = str(forward_id or "").strip() + if not normalized_scope or not normalized_forward_id: + return + if max_depth < 0 or max_nodes <= 0: + return + + root_key = _snapshot_key(normalized_scope, normalized_forward_id) + inflight = _snapshot_inflight.get(root_key) + if inflight is not None and not inflight.done(): + await asyncio.shield(inflight) + return + + async def _run() -> None: + visited: set[str] = set() + remaining = max_nodes + + async def _walk(current_forward_id: str, depth: int) -> None: + nonlocal remaining + normalized_current_id = str(current_forward_id or "").strip() + if not normalized_current_id: + return + if depth > max_depth or remaining <= 0: + return + if normalized_current_id in visited: + return + visited.add(normalized_current_id) + remaining -= 1 + + lock_key, lock = _snapshot_lock(normalized_scope, normalized_current_id) + try: + async with lock: + nodes: list[dict[str, Any]] = [] + try: + nodes = await load_forward_snapshot( + scope_key=normalized_scope, + forward_id=normalized_current_id, + ) + except Exception: + logger.debug( + "读取合并转发快照失败,将尝试回源: id=%s", + normalized_current_id, + exc_info=True, + ) + if not nodes: + try: + raw_nodes = await get_forward_messages( + normalized_current_id + ) + except Exception: + logger.debug( + "递归缓存合并转发失败: id=%s", + normalized_current_id, + exc_info=True, + ) + return + nodes = normalize_forward_nodes_for_snapshot(raw_nodes) + if not nodes: + return + try: + await save_forward_snapshot( + scope_key=normalized_scope, + forward_id=normalized_current_id, + nodes=nodes, + ) + except Exception: + logger.debug( + "写入合并转发快照失败: id=%s", + normalized_current_id, + exc_info=True, + ) + finally: + _release_snapshot_lock(lock_key, lock) + + if depth >= max_depth: + return + for nested_forward_id in _iter_nested_forward_ids(nodes): + if remaining <= 0: + break + await _walk(nested_forward_id, depth + 1) + + await _walk(normalized_forward_id, 0) + + task = asyncio.create_task(_run()) + _snapshot_inflight[root_key] = task + + def _forget(done: asyncio.Task[None]) -> None: + if _snapshot_inflight.get(root_key) is done: + _snapshot_inflight.pop(root_key, None) + + task.add_done_callback(_forget) + await asyncio.shield(task) diff --git a/src/Undefined/attachments/segments.py b/src/Undefined/attachments/segments.py index f469fc33..b79fed55 100644 --- a/src/Undefined/attachments/segments.py +++ b/src/Undefined/attachments/segments.py @@ -16,6 +16,10 @@ import httpx +from Undefined.attachments.forward_snapshot import ( + load_forward_snapshot, + snapshot_forward_tree, +) from Undefined.attachments.models import RegisteredMessageAttachments from Undefined.utils.paths import WEBUI_FILE_CACHE_DIR from Undefined.utils.xml import escape_xml_attr @@ -362,6 +366,8 @@ async def register_message_attachments( | None = None, register_forward_refs: bool = False, expand_forward_attachments: bool = True, + snapshot_forward_messages: bool = False, + snapshot_nested_forward_messages: bool = False, ) -> RegisteredMessageAttachments: """扫描消息段并将图片/文件注册到 ``AttachmentRegistry``。 @@ -373,6 +379,8 @@ async def register_message_attachments( get_forward_messages: 可选,拉取合并转发子消息。 register_forward_refs: 是否将顶层合并转发注册为 ``forward_`` 引用。 expand_forward_attachments: 是否递归扫描合并转发内的附件。 + snapshot_forward_messages: 是否读取合并转发并递归缓存可访问的节点快照。 + snapshot_nested_forward_messages: 向后兼容参数;递归缓存已覆盖内层转发。 Returns: 已注册附件引用与归一化纯文本。 @@ -392,6 +400,22 @@ async def register_message_attachments( ) visited_forward_ids: set[str] = set() + should_snapshot_forward_messages = ( + snapshot_forward_messages or snapshot_nested_forward_messages + ) + + async def _fetch_forward_nodes(forward_id: str) -> list[Mapping[str, Any]]: + if get_forward_messages is None: + return [] + try: + return _normalize_forward_nodes(await get_forward_messages(forward_id)) + except Exception as exc: + logger.debug( + "[AttachmentRegistry] forward resolver failed: id=%s err=%s", + forward_id, + exc, + ) + return [] async def _collect_from_segments( current_segments: Sequence[Mapping[str, Any]], @@ -543,6 +567,7 @@ async def _collect_from_segments( elif type_ == "forward": # 合并转发递归展开,深度上限防止无限嵌套 forward_id = _extract_forward_id(data) + forward_nodes: Sequence[Mapping[str, Any]] = [] if register_forward_refs and depth == 0 and forward_id: register_forward = getattr( registry, @@ -559,26 +584,56 @@ async def _collect_from_segments( ) ref = record.prompt_ref() + should_fetch_forward = ( + get_forward_messages is not None + and forward_id + and forward_id not in visited_forward_ids + and ( + should_snapshot_forward_messages + or ( + expand_forward_attachments + and depth < _FORWARD_ATTACHMENT_MAX_DEPTH + ) + ) + ) + if should_fetch_forward: + assert get_forward_messages is not None + visited_forward_ids.add(forward_id) + if should_snapshot_forward_messages: + try: + await snapshot_forward_tree( + scope_key=scope_key, + forward_id=forward_id, + get_forward_messages=get_forward_messages, + ) + except Exception: + logger.debug( + "[AttachmentRegistry] forward snapshot failed: id=%s", + forward_id, + exc_info=True, + ) + forward_nodes = await load_forward_snapshot( + scope_key=scope_key, + forward_id=forward_id, + ) + if not forward_nodes: + forward_nodes = await _fetch_forward_nodes(forward_id) + else: + forward_nodes = await _fetch_forward_nodes(forward_id) + if ( expand_forward_attachments and get_forward_messages is not None and depth < _FORWARD_ATTACHMENT_MAX_DEPTH and forward_id - and forward_id not in visited_forward_ids ): - visited_forward_ids.add(forward_id) - try: - nodes = _normalize_forward_nodes( - await get_forward_messages(forward_id) - ) - except Exception as exc: - logger.debug( - "[AttachmentRegistry] forward resolver failed: id=%s err=%s", - forward_id, - exc, - ) - nodes = [] - for node_index, node in enumerate(nodes): + if not forward_nodes: + if forward_id not in visited_forward_ids: + visited_forward_ids.add(forward_id) + forward_nodes = await _fetch_forward_nodes(forward_id) + if not forward_nodes: + continue + for node_index, node in enumerate(forward_nodes): raw_message = ( node.get("content") or node.get("message") diff --git a/src/Undefined/handlers/message_flow.py b/src/Undefined/handlers/message_flow.py index 7cc0961d..3993dd14 100644 --- a/src/Undefined/handlers/message_flow.py +++ b/src/Undefined/handlers/message_flow.py @@ -299,6 +299,7 @@ async def _collect_message_attachments( else None, register_forward_refs=True, expand_forward_attachments=False, + snapshot_forward_messages=True, ) attachments = result.attachments # 命中表情库时为 AI 上下文补充 [表情包] 描述 diff --git a/src/Undefined/skills/agents/file_analysis_agent/README.md b/src/Undefined/skills/agents/file_analysis_agent/README.md index d1c31224..4741df46 100644 --- a/src/Undefined/skills/agents/file_analysis_agent/README.md +++ b/src/Undefined/skills/agents/file_analysis_agent/README.md @@ -12,6 +12,7 @@ 运行机制: - 由 `AgentRegistry` 自动发现并注册 - 通过 `prompt` 输入任务描述并调用内部工具 +- 内部附件 UID(`pic_xxx` / `file_xxx`)由工具按当前会话作用域解析;多模态分析可直接传 UID,其他解析工具先用 `download_file` 转成本地路径 - PDF 文字提取走 `extract_pdf`;扫描版、图表、版式或指定页码范围视觉分析走 `describe_pdf_page` 开发提示: diff --git a/src/Undefined/skills/agents/file_analysis_agent/prompt.md b/src/Undefined/skills/agents/file_analysis_agent/prompt.md index b2476d96..2e2ac8ca 100644 --- a/src/Undefined/skills/agents/file_analysis_agent/prompt.md +++ b/src/Undefined/skills/agents/file_analysis_agent/prompt.md @@ -6,7 +6,7 @@ - 超大文件、乱码、缺页、格式损坏或工具无法解析时,如实说明影响,并尽量给出已能提取的部分。 附件输入规则: -- 用户上下文里有内部附件 UID(如 `pic_xxx` / `file_xxx`)时,优先直接使用该 UID。 +- 用户上下文里有内部附件 UID(如 `pic_xxx` / `file_xxx`)时,优先使用该 UID,不要改写成文件名或 URL。 - 没有内部 UID 时,才使用显式 URL、legacy `file_id`、arXiv 标识或 Bilibili 标识。 - 不要臆造、改写或猜测附件 UID。 @@ -15,6 +15,7 @@ - 如果文件源是 Bilibili BV 号、AV 号、B 站视频链接或 b23.tv 短链,先调用共享工具 `bilibili_video`,设置 `output_mode="uid"`,拿到 `` 后再按普通视频 UID 下载和分析。 - 已经给出内部附件 UID 时,不要再调用 arXiv/Bilibili 获取工具。 - 根据用户目标选择合适工具:文本读取、文件类型检测、PDF/Office/表格/代码/压缩包/多模态分析都按内容类型处理。 +- `analyze_multimodal` 支持直接传入内部附件 UID;其他需要本地路径的工具应先调用 `download_file` 将 UID 转成本地临时文件路径。 - PDF 文本和元数据优先用 `extract_pdf`;扫描版、图表、版式、公式图、截图式页面或用户指定页码时,用 `describe_pdf_page` 做逐页视觉分析。 - `describe_pdf_page` 支持页码范围,例如 `3`、`3-5`、`3,5,8-10`;单次最多 5 页,范围过大时请缩小。 - 对图片和多模态文件,重点报告客观可见信息,例如文字、UI、场景、人物、角色、应用/游戏名称和关键元素。 diff --git a/src/Undefined/skills/agents/file_analysis_agent/tools/analyze_multimodal/handler.py b/src/Undefined/skills/agents/file_analysis_agent/tools/analyze_multimodal/handler.py index 707653df..1054df0f 100644 --- a/src/Undefined/skills/agents/file_analysis_agent/tools/analyze_multimodal/handler.py +++ b/src/Undefined/skills/agents/file_analysis_agent/tools/analyze_multimodal/handler.py @@ -6,6 +6,77 @@ logger = logging.getLogger(__name__) +def _is_attachment_uid(value: str) -> bool: + text = value.strip() + return text.startswith(("pic_", "file_")) + + +def _local_path_from_record(record: Any) -> Path | None: + local_path_raw = str(getattr(record, "local_path", "") or "").strip() + if not local_path_raw: + return None + local_path = ( + local_path_raw[7:] if local_path_raw.startswith("file://") else local_path_raw + ) + path = Path(local_path) + return path if path.is_file() else None + + +def _scope_key_from_context(context: Dict[str, Any]) -> str | None: + scope_key = str(context.get("scope_key") or "").strip() + if scope_key: + return scope_key + get_scope_from_context = context.get("get_scope_from_context") + if callable(get_scope_from_context): + scope_key = str(get_scope_from_context(context) or "").strip() + return scope_key or None + + +async def _resolve_attachment_uid_path( + file_path: str, + context: Dict[str, Any], +) -> tuple[Path | None, str | None]: + if not _is_attachment_uid(file_path): + return None, None + + attachment_registry = context.get("attachment_registry") + if attachment_registry is None: + return None, f"错误:无法解析附件 UID {file_path}:缺少 attachment_registry" + + scope_key = _scope_key_from_context(context) + if not scope_key: + return None, f"错误:无法解析附件 UID {file_path}:缺少会话作用域" + + try: + load = getattr(attachment_registry, "load", None) + if load is not None: + await load() + resolve_async = getattr(attachment_registry, "resolve_async", None) + if resolve_async is not None: + record = await resolve_async(file_path, scope_key) + else: + record = attachment_registry.resolve(file_path, scope_key) + except Exception: + logger.exception("附件 UID 解析失败: %s", file_path) + return None, f"错误:附件 UID 解析失败:{file_path}" + + if record is None: + return None, f"错误:附件 UID 不存在或无权访问:{file_path}" + + ensure_local_file = getattr(attachment_registry, "ensure_local_file", None) + if ensure_local_file is not None: + try: + record = await ensure_local_file(record) + except Exception: + logger.exception("附件 UID 本地化失败: %s", file_path) + return None, f"错误:附件 UID 本地化失败:{file_path}" + + local_path = _local_path_from_record(record) + if local_path is None: + return None, f"错误:附件 UID 无法解析到本地文件:{file_path}" + return local_path, None + + def _format_result(result: dict[str, str]) -> str: """将分析结果字典格式化为文本。""" lines: list[str] = [] @@ -61,7 +132,12 @@ async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str: path = Path(file_path) if not path.exists(): - return f"错误:文件不存在 {file_path}" + resolved_path, error = await _resolve_attachment_uid_path(file_path, context) + if error: + return error + if resolved_path is None: + return f"错误:文件不存在 {file_path}" + path = resolved_path if not path.is_file(): return f"错误:{file_path} 不是文件" diff --git a/src/Undefined/skills/toolsets/messages/get_forward_msg/handler.py b/src/Undefined/skills/toolsets/messages/get_forward_msg/handler.py index 29ad4a69..be4895d1 100644 --- a/src/Undefined/skills/toolsets/messages/get_forward_msg/handler.py +++ b/src/Undefined/skills/toolsets/messages/get_forward_msg/handler.py @@ -5,6 +5,11 @@ from typing import Any, Mapping from Undefined.attachments import build_attachment_scope, register_message_attachments +from Undefined.attachments.forward_snapshot import ( + load_forward_snapshot, + save_forward_snapshot, + snapshot_forward_tree, +) from Undefined.attachments.segments import ( forward_ref_to_tag, normalize_message_segments, @@ -42,13 +47,13 @@ def _format_time(raw_time: Any) -> str: return str(raw_time) -def _normalize_nodes(raw_nodes: Any) -> list[Mapping[str, Any]]: +def _normalize_nodes(raw_nodes: Any) -> list[dict[str, Any]]: if isinstance(raw_nodes, list): - return [node for node in raw_nodes if isinstance(node, Mapping)] + return [dict(node) for node in raw_nodes if isinstance(node, Mapping)] if isinstance(raw_nodes, Mapping): messages = raw_nodes.get("messages") if isinstance(messages, list): - return [node for node in messages if isinstance(node, Mapping)] + return [dict(node) for node in messages if isinstance(node, Mapping)] return [] @@ -86,6 +91,50 @@ def _raw_forward_id_from_record(uid_or_id: str, context: Mapping[str, Any]) -> s return str(getattr(record, "source_ref", "") or "").strip() +def _resolve_forward_record( + uid_or_id: str, + context: Mapping[str, Any], +) -> tuple[str, str | None, Any | None]: + """解析工具入参对应的 raw forward id、scope 和注册记录。""" + if not uid_or_id.startswith("forward_"): + return uid_or_id, _resolve_scope_key(context), None + + registry = context.get("attachment_registry") + scope_key = _resolve_scope_key(context) + if registry is None or not scope_key: + return "", scope_key, None + resolve = getattr(registry, "resolve", None) + if not callable(resolve): + return "", scope_key, None + record = resolve(uid_or_id, scope_key) + if record is None or getattr(record, "media_type", "") != "forward": + return "", scope_key, None + return str(getattr(record, "source_ref", "") or "").strip(), scope_key, record + + +def _format_unavailable_message( + *, + message_id: str, + raw_forward_id: str, + record: Any | None, +) -> str: + lines = [ + "未能获取到合并转发消息的内容或内容为空。", + "这通常表示协议端当前无法回源该层合并转发,常见原因包括内层转发不可二次读取、资源过期或权限受限。", + f"请求 ID: {message_id}", + f"源 ID: {raw_forward_id}", + ] + if record is not None: + segment_data = getattr(record, "segment_data", {}) or {} + if isinstance(segment_data, Mapping) and segment_data: + details = ", ".join( + f"{key}={value}" for key, value in sorted(segment_data.items()) + ) + if details: + lines.append(f"原始字段: {details}") + return "\n".join(lines) + + async def _register_node_segments( *, segments: list[Mapping[str, Any]], @@ -130,9 +179,10 @@ async def _register_node_segments( segments=segments, scope_key=scope_key, resolve_image_url=resolve_image_url, - get_forward_messages=None, + get_forward_messages=context.get("get_forward_msg_callback"), register_forward_refs=True, expand_forward_attachments=False, + snapshot_forward_messages=True, ) refs = list(result.attachments) + list(result.forward_refs) return result.normalized_text, refs @@ -147,7 +197,7 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: if not callable(get_forward_msg_callback): return "错误:获取合并转发消息的回调未设置" - raw_forward_id = _raw_forward_id_from_record(message_id, context) + raw_forward_id, scope_key, record = _resolve_forward_record(message_id, context) if not raw_forward_id: return f"错误:合并转发 UID 不可用或不属于当前会话:{message_id}" @@ -156,14 +206,73 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: # 保留参数用于向后兼容和未来扩展;当前实现默认首层,不递归展开。 _ = _safe_int(args.get("max_depth"), 1, minimum=1, maximum=5) - try: - nodes = _normalize_nodes(await get_forward_msg_callback(raw_forward_id)) - except Exception as exc: - logger.exception("获取合并转发消息失败: id=%s", raw_forward_id) - return f"获取合并转发消息失败:{exc}" + nodes: list[dict[str, Any]] = [] + source_note = "" + if scope_key: + try: + load_nodes = await load_forward_snapshot( + scope_key=scope_key, + forward_id=raw_forward_id, + ) + if load_nodes: + nodes = load_nodes + source_note = "(来自本地快照)" + except Exception: + logger.debug("读取合并转发快照失败: id=%s", raw_forward_id, exc_info=True) if not nodes: - return "未能获取到合并转发消息的内容或内容为空" + try: + nodes = _normalize_nodes(await get_forward_msg_callback(raw_forward_id)) + if nodes and scope_key: + try: + await save_forward_snapshot( + scope_key=scope_key, + forward_id=raw_forward_id, + nodes=nodes, + ) + await snapshot_forward_tree( + scope_key=scope_key, + forward_id=raw_forward_id, + get_forward_messages=get_forward_msg_callback, + ) + nodes = ( + await load_forward_snapshot( + scope_key=scope_key, + forward_id=raw_forward_id, + ) + or nodes + ) + except Exception: + logger.debug( + "递归保存合并转发快照失败: id=%s", + raw_forward_id, + exc_info=True, + ) + except Exception as exc: + logger.exception("获取合并转发消息失败: id=%s", raw_forward_id) + if scope_key: + try: + nodes = await load_forward_snapshot( + scope_key=scope_key, + forward_id=raw_forward_id, + ) + if nodes: + source_note = "(来自本地快照,OneBot 回源失败)" + except Exception: + logger.debug( + "OneBot 失败后读取合并转发快照也失败: id=%s", + raw_forward_id, + exc_info=True, + ) + if not nodes: + return f"获取合并转发消息失败:{exc}" + + if not nodes: + return _format_unavailable_message( + message_id=message_id, + raw_forward_id=raw_forward_id, + record=record, + ) window = nodes[offset : offset + limit] if not window: @@ -232,7 +341,7 @@ async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: next_offset = offset + len(window) page_note = ( - f"合并转发 {message_id}(源 ID: {raw_forward_id})节点 " + f"合并转发 {message_id}(源 ID: {raw_forward_id}){source_note}节点 " f"{offset + 1}-{next_offset}/{len(nodes)}" ) if next_offset < len(nodes): diff --git a/src/Undefined/utils/paths.py b/src/Undefined/utils/paths.py index a077bed3..330672d9 100644 --- a/src/Undefined/utils/paths.py +++ b/src/Undefined/utils/paths.py @@ -10,6 +10,7 @@ RENDER_CACHE_DIR: Path = CACHE_DIR / "render" IMAGE_CACHE_DIR: Path = CACHE_DIR / "images" ATTACHMENT_CACHE_DIR: Path = CACHE_DIR / "attachments" +FORWARD_SNAPSHOT_CACHE_DIR: Path = CACHE_DIR / "forward_snapshots" DOWNLOAD_CACHE_DIR: Path = CACHE_DIR / "downloads" TEXT_FILE_CACHE_DIR: Path = CACHE_DIR / "text_files" URL_FILE_CACHE_DIR: Path = CACHE_DIR / "url_files" diff --git a/tests/test_attachments.py b/tests/test_attachments.py index a025cd4a..8a4dda66 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -10,11 +10,14 @@ import httpx import pytest +from Undefined.attachments import forward_snapshot +from Undefined.attachments import segments as attachment_segments from Undefined.attachments import ( AttachmentRecord, AttachmentRegistry, append_attachment_text, attachment_refs_to_xml, + load_forward_snapshot, register_message_attachments, render_message_with_pic_placeholders, ) @@ -674,6 +677,139 @@ async def _fake_get_forward(_forward_id: str) -> list[dict[str, object]]: assert record.source_ref == "forward-1" +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("snapshot_forward_messages", "snapshot_nested_forward_messages"), + [ + (True, False), + (False, True), + ], +) +async def test_register_message_attachments_snapshots_forward_tree_without_expansion( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + snapshot_forward_messages: bool, + snapshot_nested_forward_messages: bool, +) -> None: + monkeypatch.setattr( + forward_snapshot, + "FORWARD_SNAPSHOT_CACHE_DIR", + tmp_path / "forward_snapshots", + ) + monkeypatch.setattr(forward_snapshot, "_snapshot_locks", {}) + monkeypatch.setattr(forward_snapshot, "_snapshot_lock_users", {}) + monkeypatch.setattr(forward_snapshot, "_snapshot_inflight", {}) + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + calls: list[str] = [] + + async def _fake_get_forward(forward_id: str) -> list[dict[str, object]]: + calls.append(forward_id) + if forward_id == "outer-forward": + return [ + { + "message": [ + {"type": "text", "data": {"text": "外层内容"}}, + {"type": "forward", "data": {"id": "inner-forward"}}, + ] + } + ] + if forward_id == "inner-forward": + return [ + { + "message": [ + {"type": "text", "data": {"text": "内层内容"}}, + ] + } + ] + return [] + + result = await register_message_attachments( + registry=registry, + segments=[{"type": "forward", "data": {"id": "outer-forward"}}], + scope_key="group:10001", + get_forward_messages=_fake_get_forward, + register_forward_refs=True, + expand_forward_attachments=False, + snapshot_forward_messages=snapshot_forward_messages, + snapshot_nested_forward_messages=snapshot_nested_forward_messages, + ) + + assert calls == ["outer-forward", "inner-forward"] + assert result.attachments == [] + assert len(result.forward_refs) == 1 + assert result.normalized_text == f'' + assert "外层内容" not in result.normalized_text + assert await load_forward_snapshot( + scope_key="group:10001", + forward_id="inner-forward", + ) == [ + { + "message": [ + {"type": "text", "data": {"text": "内层内容"}}, + ] + } + ] + + +@pytest.mark.asyncio +async def test_register_message_attachments_empty_snapshot_falls_back_to_resolver( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + encoded_image = base64.b64encode(_PNG_BYTES).decode("ascii") + calls: list[str] = [] + + async def _noop_snapshot_forward_tree(**_kwargs: Any) -> None: + return None + + async def _empty_load_forward_snapshot(**_kwargs: Any) -> list[dict[str, Any]]: + return [] + + async def _fake_get_forward(forward_id: str) -> list[dict[str, object]]: + calls.append(forward_id) + return [ + { + "message": [ + { + "type": "image", + "data": {"file": f"base64://{encoded_image}"}, + } + ] + } + ] + + monkeypatch.setattr( + attachment_segments, + "snapshot_forward_tree", + _noop_snapshot_forward_tree, + ) + monkeypatch.setattr( + attachment_segments, + "load_forward_snapshot", + _empty_load_forward_snapshot, + ) + + result = await register_message_attachments( + registry=registry, + segments=[{"type": "forward", "data": {"id": "outer-forward"}}], + scope_key="group:10001", + get_forward_messages=_fake_get_forward, + expand_forward_attachments=True, + snapshot_forward_messages=True, + ) + + assert calls == ["outer-forward"] + assert len(result.attachments) == 1 + assert result.attachments[0]["uid"].startswith("pic_") + + def test_attachment_refs_to_xml_skips_forward_refs() -> None: xml = attachment_refs_to_xml( [ diff --git a/tests/test_file_analysis_attachment_uid.py b/tests/test_file_analysis_attachment_uid.py index 06da1334..2840fad4 100644 --- a/tests/test_file_analysis_attachment_uid.py +++ b/tests/test_file_analysis_attachment_uid.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio from pathlib import Path from typing import Any @@ -9,6 +10,9 @@ from Undefined.skills.agents.file_analysis_agent.tools.download_file import ( handler as download_file_handler, ) +from Undefined.skills.agents.file_analysis_agent.tools.analyze_multimodal import ( + handler as analyze_multimodal_handler, +) from Undefined.utils.io import write_bytes from Undefined.utils.paths import ensure_dir @@ -28,6 +32,57 @@ def _download_context( } +class _FakeAiClient: + def __init__(self) -> None: + self.analyze_calls: list[dict[str, str]] = [] + self.saved_history: list[dict[str, str]] = [] + + def get_media_history(self, media_key: str) -> list[dict[str, str]]: + _ = media_key + return [] + + async def analyze_multimodal( + self, + media_url: str, + *, + media_type: str, + prompt_extra: str, + ) -> dict[str, str]: + self.analyze_calls.append( + { + "media_url": media_url, + "media_type": media_type, + "prompt_extra": prompt_extra, + } + ) + return {"description": "image analyzed"} + + async def save_media_history( + self, + media_key: str, + question: str, + answer: str, + ) -> None: + self.saved_history.append( + {"media_key": media_key, "question": question, "answer": answer} + ) + + +def _analysis_context( + registry: AttachmentRegistry, + ai_client: _FakeAiClient, + *, + user_id: int = 12345, +) -> dict[str, Any]: + return { + "attachment_registry": registry, + "request_type": "private", + "user_id": user_id, + "get_scope_from_context": scope_from_context, + "ai_client": ai_client, + } + + @pytest.mark.asyncio async def test_download_file_supports_internal_attachment_uid( tmp_path: Path, @@ -149,3 +204,67 @@ async def test_download_file_uses_random_name_for_unsafe_attachment_name( assert downloaded.name.startswith("image_") assert len(downloaded.name) < 64 assert downloaded.read_bytes() == b"image bytes" + + +@pytest.mark.asyncio +async def test_analyze_multimodal_supports_internal_attachment_uid( + tmp_path: Path, +) -> None: + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + record = await registry.register_bytes( + "private:12345", + b"image bytes", + kind="image", + display_name="demo.jpg", + source_kind="test", + ) + ai_client = _FakeAiClient() + + result = await analyze_multimodal_handler.execute( + { + "file_path": record.uid, + "media_type": "image", + "prompt": "描述图片", + }, + _analysis_context(registry, ai_client), + ) + + assert result == "描述:image analyzed" + assert len(ai_client.analyze_calls) == 1 + call = ai_client.analyze_calls[0] + assert call["media_url"] != record.uid + media_path = Path(call["media_url"]) + assert await asyncio.to_thread(media_path.is_file) + assert await asyncio.to_thread(media_path.read_bytes) == b"image bytes" + assert call["media_type"] == "image" + assert call["prompt_extra"] == "描述图片" + assert ai_client.saved_history + + +@pytest.mark.asyncio +async def test_analyze_multimodal_rejects_attachment_uid_from_other_scope( + tmp_path: Path, +) -> None: + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + record = await registry.register_bytes( + "private:12345", + b"image bytes", + kind="image", + display_name="demo.jpg", + source_kind="test", + ) + ai_client = _FakeAiClient() + + result = await analyze_multimodal_handler.execute( + {"file_path": record.uid, "media_type": "image"}, + _analysis_context(registry, ai_client, user_id=99999), + ) + + assert result == f"错误:附件 UID 不存在或无权访问:{record.uid}" + assert ai_client.analyze_calls == [] diff --git a/tests/test_forward_snapshot.py b/tests/test_forward_snapshot.py new file mode 100644 index 00000000..4d6f8f0b --- /dev/null +++ b/tests/test_forward_snapshot.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import Any + +import pytest + +from Undefined.attachments import forward_snapshot +from Undefined.attachments.forward_snapshot import ( + load_forward_snapshot, + normalize_forward_nodes_for_snapshot, + save_forward_snapshot, + snapshot_forward_tree, +) + + +class _OddValue: + def __str__(self) -> str: + return "odd-value" + + +def _reset_forward_snapshot_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + forward_snapshot, + "FORWARD_SNAPSHOT_CACHE_DIR", + tmp_path / "forward_snapshots", + ) + monkeypatch.setattr(forward_snapshot, "_snapshot_locks", {}) + monkeypatch.setattr(forward_snapshot, "_snapshot_lock_users", {}) + monkeypatch.setattr(forward_snapshot, "_snapshot_inflight", {}) + + +@pytest.mark.asyncio +async def test_forward_snapshot_round_trip( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_forward_snapshot_state(tmp_path, monkeypatch) + + saved = await save_forward_snapshot( + scope_key="group:10001", + forward_id="raw-forward", + nodes=[ + { + "sender": {"nickname": "Alice", "user_id": 123}, + "message": [{"type": "text", "data": {"text": "hello"}}], + } + ], + ) + + assert saved is True + loaded = await load_forward_snapshot( + scope_key="group:10001", + forward_id="raw-forward", + ) + assert loaded == [ + { + "sender": {"nickname": "Alice", "user_id": 123}, + "message": [{"type": "text", "data": {"text": "hello"}}], + } + ] + + +@pytest.mark.asyncio +async def test_forward_snapshot_is_scoped( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_forward_snapshot_state(tmp_path, monkeypatch) + + await save_forward_snapshot( + scope_key="group:10001", + forward_id="raw-forward", + nodes=[{"message": [{"type": "text", "data": {"text": "group"}}]}], + ) + + assert ( + await load_forward_snapshot(scope_key="group:20002", forward_id="raw-forward") + == [] + ) + + +def test_normalize_forward_nodes_for_snapshot_cleans_values() -> None: + nodes: list[dict[str, Any]] = [ + { + "message": [{"type": "text", "data": {"text": _OddValue()}}], + "ignored": object(), + } + ] + + assert normalize_forward_nodes_for_snapshot(nodes) == [ + { + "message": [{"type": "text", "data": {"text": "odd-value"}}], + "ignored": str(nodes[0]["ignored"]), + } + ] + + +@pytest.mark.asyncio +async def test_snapshot_forward_tree_recursively_saves_nested_forwards( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_forward_snapshot_state(tmp_path, monkeypatch) + calls: list[str] = [] + + async def _get_forward(forward_id: str) -> list[dict[str, Any]]: + calls.append(forward_id) + if forward_id == "outer": + return [ + { + "message": [ + {"type": "text", "data": {"text": "外层"}}, + {"type": "forward", "data": {"id": "inner"}}, + ] + } + ] + if forward_id == "inner": + return [ + { + "message": [ + {"type": "text", "data": {"text": "内层"}}, + ] + } + ] + return [] + + await snapshot_forward_tree( + scope_key="group:10001", + forward_id="outer", + get_forward_messages=_get_forward, + ) + + assert calls == ["outer", "inner"] + assert await load_forward_snapshot( + scope_key="group:10001", + forward_id="outer", + ) == [ + { + "message": [ + {"type": "text", "data": {"text": "外层"}}, + {"type": "forward", "data": {"id": "inner"}}, + ] + } + ] + assert await load_forward_snapshot( + scope_key="group:10001", + forward_id="inner", + ) == [ + { + "message": [ + {"type": "text", "data": {"text": "内层"}}, + ] + } + ] + assert forward_snapshot._snapshot_locks == {} + assert forward_snapshot._snapshot_lock_users == {} + assert forward_snapshot._snapshot_inflight == {} + + +@pytest.mark.asyncio +async def test_snapshot_forward_tree_coalesces_concurrent_root_fetches( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _reset_forward_snapshot_state(tmp_path, monkeypatch) + calls = 0 + release = asyncio.Event() + + async def _get_forward(forward_id: str) -> list[dict[str, Any]]: + nonlocal calls + calls += 1 + assert forward_id == "outer" + await release.wait() + return [{"message": [{"type": "text", "data": {"text": "内容"}}]}] + + first = asyncio.create_task( + snapshot_forward_tree( + scope_key="group:10001", + forward_id="outer", + get_forward_messages=_get_forward, + ) + ) + await asyncio.sleep(0) + second = asyncio.create_task( + snapshot_forward_tree( + scope_key="group:10001", + forward_id="outer", + get_forward_messages=_get_forward, + ) + ) + await asyncio.sleep(0) + release.set() + + await asyncio.gather(first, second) + + assert calls == 1 + assert forward_snapshot._snapshot_locks == {} + assert forward_snapshot._snapshot_lock_users == {} + assert forward_snapshot._snapshot_inflight == {} diff --git a/tests/test_get_forward_msg_tool.py b/tests/test_get_forward_msg_tool.py index 91c1b5ff..e71cdbc2 100644 --- a/tests/test_get_forward_msg_tool.py +++ b/tests/test_get_forward_msg_tool.py @@ -6,6 +6,7 @@ import pytest +from Undefined.attachments import forward_snapshot from Undefined.attachments import AttachmentRegistry from Undefined.skills.toolsets.messages.get_forward_msg.handler import execute @@ -23,7 +24,13 @@ @pytest.mark.asyncio async def test_get_forward_msg_accepts_forward_uid_and_registers_nested_refs( tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, ) -> None: + monkeypatch.setattr( + forward_snapshot, + "FORWARD_SNAPSHOT_CACHE_DIR", + tmp_path / "forward_snapshots", + ) registry = AttachmentRegistry( registry_path=tmp_path / "attachment_registry.json", cache_dir=tmp_path / "attachments", @@ -59,12 +66,11 @@ async def _get_forward(forward_id: str) -> list[dict[str, Any]]: }, ) - assert seen_ids == ["raw-forward-1"] + assert seen_ids == ["raw-forward-1", "raw-forward-2"] assert "节点 1-1/1" in result assert "第一层" in result assert ' list[dict[str, Any]]: assert "n1" in result assert "n0" not in result assert "offset=2" in result + + +@pytest.mark.asyncio +async def test_get_forward_msg_uses_snapshot_when_nested_forward_becomes_unavailable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + forward_snapshot, + "FORWARD_SNAPSHOT_CACHE_DIR", + tmp_path / "forward_snapshots", + ) + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + outer_record = await registry.register_forward_reference( + "group:10001", + "outer-forward", + ) + seen_ids: list[str] = [] + + async def _get_forward(forward_id: str) -> list[dict[str, Any]]: + seen_ids.append(forward_id) + if forward_id == "outer-forward": + return [ + { + "sender": {"nickname": "Alice", "user_id": 123}, + "message": [ + {"type": "text", "data": {"text": "外层"}}, + {"type": "forward", "data": {"id": "nested-forward"}}, + ], + } + ] + if forward_id == "nested-forward" and seen_ids.count("nested-forward") == 1: + return [ + { + "sender": {"nickname": "Bob", "user_id": 456}, + "message": [ + {"type": "text", "data": {"text": "内层内容"}}, + ], + } + ] + return [] + + context = { + "attachment_registry": registry, + "get_forward_msg_callback": _get_forward, + "group_id": 10001, + "request_type": "group", + } + + outer_result = await execute({"message_id": outer_record.uid}, context) + + assert "外层" in outer_result + assert "nested-forward" in seen_ids + nested_records = [ + record + for record in registry._records.values() + if record.media_type == "forward" and record.source_ref == "nested-forward" + ] + assert len(nested_records) == 1 + + nested_result = await execute({"message_id": nested_records[0].uid}, context) + + assert "来自本地快照" in nested_result + assert "内层内容" in nested_result + assert seen_ids.count("nested-forward") == 1 + + raw_nested_result = await execute({"message_id": "nested-forward"}, context) + + assert "来自本地快照" in raw_nested_result + assert "内层内容" in raw_nested_result + assert seen_ids.count("nested-forward") == 1 + + +@pytest.mark.asyncio +async def test_get_forward_msg_reports_unavailable_forward_metadata( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + forward_snapshot, + "FORWARD_SNAPSHOT_CACHE_DIR", + tmp_path / "forward_snapshots", + ) + registry = AttachmentRegistry( + registry_path=tmp_path / "attachment_registry.json", + cache_dir=tmp_path / "attachments", + ) + record = await registry.register_forward_reference( + "group:10001", + "nested-forward", + segment_data={"id": "nested-forward", "resid": "resid-forward"}, + ) + + async def _get_forward(_forward_id: str) -> list[dict[str, Any]]: + return [] + + result = await execute( + {"message_id": record.uid}, + { + "attachment_registry": registry, + "get_forward_msg_callback": _get_forward, + "group_id": 10001, + "request_type": "group", + }, + ) + + assert "协议端当前无法回源该层合并转发" in result + assert "源 ID: nested-forward" in result + assert "原始字段:" in result + assert "resid=resid-forward" in result diff --git a/tests/test_prompt_builder_cognitive_query.py b/tests/test_prompt_builder_cognitive_query.py index 1f4de7ab..2c272618 100644 --- a/tests/test_prompt_builder_cognitive_query.py +++ b/tests/test_prompt_builder_cognitive_query.py @@ -171,3 +171,24 @@ def test_drop_current_message_if_duplicated_removes_whole_current_batch_tail() - filtered = drop_current_message_if_duplicated(recent_messages, question) assert [msg["message"] for msg in filtered] == ["保留的历史消息"] + + +def test_drop_current_message_if_duplicated_matches_message_id_before_content() -> None: + recent_messages = [ + { + "type": "group", + "message_id": "7654205319537693084", + "display_name": "测试用户", + "user_id": "10001", + "chat_id": "20001", + "timestamp": "2026-06-22 19:35:55", + "message": "[合并转发展开: 7654205319537693084]\n外层第一条内容", + } + ] + question = """ + +""" + + filtered = drop_current_message_if_duplicated(recent_messages, question) + + assert filtered == [] diff --git a/uv.lock b/uv.lock index b3cda040..3c600da3 100644 --- a/uv.lock +++ b/uv.lock @@ -4626,7 +4626,7 @@ wheels = [ [[package]] name = "undefined-bot" -version = "3.6.2" +version = "3.6.3" source = { editable = "." } dependencies = [ { name = "aiofiles" },