From 6b4e7fef8c29270dcf5b1079e0ae57994d80ef1d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 18:27:38 +0000 Subject: [PATCH 01/18] Implement persistent image cache with ImageCacheManager class Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/bd6001d7-aeef-4786-a5ec-50b8ef63f22d Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- main.py | 20 +++- utils/__init__.py | 2 + utils/image_cache.py | 260 +++++++++++++++++++++++++++++++++++++++++ utils/image_caption.py | 16 +-- 4 files changed, 289 insertions(+), 9 deletions(-) create mode 100644 utils/image_cache.py diff --git a/main.py b/main.py index 7a4bd43..3fe3ad0 100644 --- a/main.py +++ b/main.py @@ -127,7 +127,8 @@ async def help(self, event: AstrMessageEvent): " 你也可以重置指定群聊天记录 如/sc reset 群号\n" "使用history指令可以查看最近聊天记录 如/sc history\n" "使用mute/闭嘴指令临时禁用自动回复 如/sc mute 5 或 /sc 闭嘴 10\n" - "使用unmute/说话指令解除禁用 如/sc unmute 或 /sc 说话" + "使用unmute/说话指令解除禁用 如/sc unmute 或 /sc 说话\n" + "使用clear_image_cache/清除图片缓存指令清除图片转述缓存 如/sc clear_image_cache" ) platform_name = event.get_platform_name() if platform_name in ("qq_official", "qq_official_webhook"): @@ -286,4 +287,19 @@ async def callllm(self, event: AstrMessageEvent): yield await LLMUtils.call_llm(event, self.config, self.context) except Exception as e: logger.error(f"调用大模型时发生错误: {e}") - yield event.plain_result(f"触发大模型回复失败喵:{str(e)}") \ No newline at end of file + yield event.plain_result(f"触发大模型回复失败喵:{str(e)}") + + @filter.permission_type(filter.PermissionType.ADMIN) + @spectrecore.command("clear_image_cache", alias=['清除图片缓存']) + async def clear_image_cache(self, event: AstrMessageEvent): + """清除图片转述缓存喵""" + try: + from .utils import ImageCacheManager + success = ImageCacheManager.clear() + if success: + yield event.plain_result("已成功清除图片转述缓存喵~") + else: + yield event.plain_result("清除图片转述缓存失败喵,可能发生错误") + except Exception as e: + logger.error(f"清除图片缓存时发生错误: {e}") + yield event.plain_result(f"清除图片缓存失败喵:{str(e)}") \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index 5609632..460c44b 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -5,6 +5,7 @@ from .history_storage import HistoryStorage from .message_utils import MessageUtils from .image_caption import ImageCaptionUtils +from .image_cache import ImageCacheManager from .llm_utils import LLMUtils from .persona_utils import PersonaUtils from .text_filter import TextFilter @@ -14,6 +15,7 @@ "HistoryStorage", "MessageUtils", "ImageCaptionUtils", + "ImageCacheManager", "LLMUtils", "PersonaUtils", "TextFilter", diff --git a/utils/image_cache.py b/utils/image_cache.py new file mode 100644 index 0000000..ffb4b25 --- /dev/null +++ b/utils/image_cache.py @@ -0,0 +1,260 @@ +import os +import json +import time +import hashlib +import traceback +from typing import Optional, Dict +from astrbot.api.all import * + + +class ImageCacheManager: + """ + 图片转述缓存管理器 + + 用于持久化存储图片转述缓存,避免重复的图片转述请求 + """ + + # 保存配置对象的静态变量 + config: Optional[AstrBotConfig] = None + # 基础存储路径 + base_storage_path: Optional[str] = None + # 内存缓存(用于快速查询) + memory_cache: Dict[str, tuple[str, float]] = {} + + @staticmethod + def init(config: AstrBotConfig): + """ + 初始化图片缓存管理器,保存config引用 + + Args: + config: AstrBotConfig 对象 + """ + ImageCacheManager.config = config + # 初始化基础存储路径 + from astrbot.core.utils.astrbot_path import get_astrbot_data_path + astrbot_data_path = get_astrbot_data_path() + ImageCacheManager.base_storage_path = os.path.join(astrbot_data_path, "data", "image_caption_cache") + ImageCacheManager._ensure_dir(ImageCacheManager.base_storage_path) + logger.info(f"图片缓存存储路径初始化: {ImageCacheManager.base_storage_path}") + + # 加载现有的缓存到内存 + ImageCacheManager._load_cache_from_disk() + + @staticmethod + def _ensure_dir(directory: str) -> None: + """确保目录存在,不存在则创建""" + if not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + + @staticmethod + def _get_cache_file_path() -> str: + """获取缓存文件路径""" + if not ImageCacheManager.base_storage_path: + from astrbot.core.utils.astrbot_path import get_astrbot_data_path + astrbot_data_path = get_astrbot_data_path() + ImageCacheManager.base_storage_path = os.path.join(astrbot_data_path, "data", "image_caption_cache") + ImageCacheManager._ensure_dir(ImageCacheManager.base_storage_path) + + return os.path.join(ImageCacheManager.base_storage_path, "caption_cache.json") + + @staticmethod + def _generate_image_hash(image: str) -> str: + """ + 为图片生成哈希值(用于作为缓存键) + + 使用 SHA256 生成固定长度的哈希,避免过长的键名 + + Args: + image: 图片的base64编码或URL + + Returns: + 图片的哈希值 + """ + return hashlib.sha256(image.encode('utf-8')).hexdigest() + + @staticmethod + def _load_cache_from_disk() -> None: + """从磁盘加载缓存到内存""" + try: + cache_file = ImageCacheManager._get_cache_file_path() + + if not os.path.exists(cache_file): + logger.debug("缓存文件不存在,跳过加载") + return + + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + + # 加载缓存到内存 + if isinstance(cache_data, dict): + ImageCacheManager.memory_cache = cache_data + logger.info(f"成功从磁盘加载 {len(cache_data)} 条图片缓存") + else: + logger.warning(f"缓存文件格式不正确,跳过加载") + + except Exception as e: + logger.error(f"从磁盘加载缓存失败: {e}") + logger.debug(traceback.format_exc()) + + @staticmethod + def _save_cache_to_disk() -> None: + """将内存缓存保存到磁盘""" + try: + cache_file = ImageCacheManager._get_cache_file_path() + + # 确保父目录存在 + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + + # 转换内存缓存格式为可序列化的格式 + serializable_cache = {} + for key, value in ImageCacheManager.memory_cache.items(): + if isinstance(value, tuple) and len(value) == 2: + caption, timestamp = value + serializable_cache[key] = [caption, timestamp] + else: + serializable_cache[key] = value + + with open(cache_file, "w", encoding="utf-8") as f: + json.dump(serializable_cache, f, ensure_ascii=False, indent=2) + + logger.debug(f"成功保存 {len(ImageCacheManager.memory_cache)} 条图片缓存到磁盘") + + except Exception as e: + logger.error(f"保存缓存到磁盘失败: {e}") + logger.debug(traceback.format_exc()) + + @staticmethod + def get(image: str) -> Optional[str]: + """ + 获取缓存的图片转述 + + Args: + image: 图片的base64编码或URL + + Returns: + 缓存的转述文本,如果不存在则返回None + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + + if image_hash in ImageCacheManager.memory_cache: + cached_data = ImageCacheManager.memory_cache[image_hash] + + # 处理缓存数据格式 + if isinstance(cached_data, tuple) and len(cached_data) >= 1: + caption = cached_data[0] + elif isinstance(cached_data, list) and len(cached_data) >= 1: + caption = cached_data[0] + else: + caption = cached_data + + logger.debug(f"命中图片描述缓存: {image[:50]}...") + return caption + + return None + + except Exception as e: + logger.error(f"获取缓存失败: {e}") + return None + + @staticmethod + def set(image: str, caption: str) -> bool: + """ + 存储图片转述到缓存 + + Args: + image: 图片的base64编码或URL + caption: 图片的转述文本 + + Returns: + 是否存储成功 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + + # 存储为元组 (caption, timestamp) 用于后续清理 + ImageCacheManager.memory_cache[image_hash] = (caption, time.time()) + + # 异步保存到磁盘(通过随机概率减少I/O) + import random + if random.random() < 0.2: # 20% 的概率保存一次 + ImageCacheManager._save_cache_to_disk() + + logger.debug(f"缓存图片描述: {image[:50]}... -> {caption}") + return True + + except Exception as e: + logger.error(f"存储缓存失败: {e}") + return False + + @staticmethod + def clear() -> bool: + """ + 清空所有缓存 + + Returns: + 是否清空成功 + """ + try: + ImageCacheManager.memory_cache.clear() + + cache_file = ImageCacheManager._get_cache_file_path() + if os.path.exists(cache_file): + os.remove(cache_file) + + logger.info("已清空所有图片缓存") + return True + + except Exception as e: + logger.error(f"清空缓存失败: {e}") + return False + + @staticmethod + def cleanup_old_entries() -> None: + """ + 清理超过配置天数的缓存条目 + + 防止缓存无限增长 + """ + try: + if not ImageCacheManager.config: + logger.debug("配置未初始化,跳过缓存清理") + return + + image_processing_config = ImageCacheManager.config.get("image_processing", {}) + retention_days = image_processing_config.get("image_retention_days", 7) + + if retention_days < 1 or retention_days > 365: + logger.warning(f"图片保留天数配置无效: {retention_days},使用默认值7天") + retention_days = 7 + + current_time = time.time() + cleanup_threshold = retention_days * 24 * 3600 # 配置的天数转换为秒 + removed_count = 0 + + keys_to_remove = [] + for key, value in ImageCacheManager.memory_cache.items(): + if isinstance(value, tuple) and len(value) == 2: + caption, timestamp = value + if current_time - timestamp > cleanup_threshold: + keys_to_remove.append(key) + removed_count += 1 + + for key in keys_to_remove: + del ImageCacheManager.memory_cache[key] + + if removed_count > 0: + logger.info(f"清理过期缓存完成,清理了 {removed_count} 条超过 {retention_days} 天的缓存条目") + # 清理后保存一次 + ImageCacheManager._save_cache_to_disk() + + except Exception as e: + logger.error(f"清理缓存时发生错误: {e}") + + @staticmethod + def force_save() -> None: + """强制将缓存保存到磁盘(用于关闭前调用)""" + try: + ImageCacheManager._save_cache_to_disk() + except Exception as e: + logger.error(f"强制保存缓存失败: {e}") diff --git a/utils/image_caption.py b/utils/image_caption.py index d3f3d1d..c44bbb2 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -1,6 +1,7 @@ from astrbot.api.all import * from typing import Optional import asyncio +from .image_cache import ImageCacheManager class ImageCaptionUtils: """ @@ -12,14 +13,14 @@ class ImageCaptionUtils: # 保存context和config对象的静态变量 context: Optional[Context] = None config: Optional[AstrBotConfig] = None - # 图片描述缓存 - caption_cache: dict[str, str] = {} @staticmethod def init(context: Context, config: AstrBotConfig): """初始化图片转述工具类,保存context和config引用""" ImageCaptionUtils.context = context ImageCaptionUtils.config = config + # 初始化图片缓存管理器 + ImageCacheManager.init(config) @staticmethod async def generate_image_caption( @@ -38,10 +39,11 @@ async def generate_image_caption( Returns: 生成的图片描述文本,如果失败则返回None """ - # 检查缓存 - if image in ImageCaptionUtils.caption_cache: + # 检查持久化缓存 + cached_caption = ImageCacheManager.get(image) + if cached_caption is not None: logger.debug(f"命中图片描述缓存: {image[:50]}...") - return ImageCaptionUtils.caption_cache[image] + return cached_caption # 获取配置 config = ImageCaptionUtils.config @@ -83,9 +85,9 @@ async def call_llm(): llm_response = await asyncio.wait_for(call_llm(), timeout=timeout) caption = llm_response.completion_text - # 缓存结果 + # 缓存结果到持久化缓存 if caption: - ImageCaptionUtils.caption_cache[image] = caption + ImageCacheManager.set(image, caption) logger.debug(f"缓存图片描述: {image[:50]}... -> {caption}") return caption From 84d5e9093c162add2c2cfaf9d4d414c3a23c3a3d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 18:36:17 +0000 Subject: [PATCH 02/18] Fix final code quality issues in image cache persistence Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/bd6001d7-aeef-4786-a5ec-50b8ef63f22d Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_cache.py | 108 ++++++++++++++++++++++++++++++----------- utils/image_caption.py | 2 +- 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/utils/image_cache.py b/utils/image_cache.py index ffb4b25..8f90c82 100644 --- a/utils/image_cache.py +++ b/utils/image_cache.py @@ -14,12 +14,21 @@ class ImageCacheManager: 用于持久化存储图片转述缓存,避免重复的图片转述请求 """ + # 常量定义 + MAX_RETENTION_DAYS = 365 + DEFAULT_RETENTION_DAYS = 7 + HOURS_PER_DAY = 24 + SECONDS_PER_HOUR = 3600 + WRITE_THRESHOLD = 10 # 每10次写入保存一次 + # 保存配置对象的静态变量 config: Optional[AstrBotConfig] = None # 基础存储路径 base_storage_path: Optional[str] = None # 内存缓存(用于快速查询) memory_cache: Dict[str, tuple[str, float]] = {} + # 记录写入次数,用于周期性保存 + write_count: int = 0 @staticmethod def init(config: AstrBotConfig): @@ -30,6 +39,8 @@ def init(config: AstrBotConfig): config: AstrBotConfig 对象 """ ImageCacheManager.config = config + ImageCacheManager.write_count = 0 # 重置写入计数 + ImageCacheManager.memory_cache.clear() # 清空内存缓存,确保从磁盘重新加载 # 初始化基础存储路径 from astrbot.core.utils.astrbot_path import get_astrbot_data_path astrbot_data_path = get_astrbot_data_path() @@ -85,10 +96,24 @@ def _load_cache_from_disk() -> None: with open(cache_file, "r", encoding="utf-8") as f: cache_data = json.load(f) - # 加载缓存到内存 + # 加载缓存到内存,统一转换为元组格式,使用严格验证 if isinstance(cache_data, dict): - ImageCacheManager.memory_cache = cache_data - logger.info(f"成功从磁盘加载 {len(cache_data)} 条图片缓存") + for key, value in cache_data.items(): + try: + # 要求恰好2个元素 + if isinstance(value, (list, tuple)) and len(value) == 2: + caption, timestamp = value[0], value[1] + # 验证类型 + if isinstance(caption, str) and isinstance(timestamp, (int, float)): + ImageCacheManager.memory_cache[key] = (caption, timestamp) + else: + logger.warning(f"缓存条目类型不正确,跳过: {key}") + else: + logger.warning(f"缓存条目格式不正确,跳过: {key}") + except Exception as e: + logger.warning(f"加载缓存条目失败 {key}: {e}") + + logger.info(f"成功从磁盘加载 {len(ImageCacheManager.memory_cache)} 条图片缓存") else: logger.warning(f"缓存文件格式不正确,跳过加载") @@ -105,19 +130,29 @@ def _save_cache_to_disk() -> None: # 确保父目录存在 os.makedirs(os.path.dirname(cache_file), exist_ok=True) - # 转换内存缓存格式为可序列化的格式 + # 转换内存缓存格式为可序列化的格式,并验证条目 serializable_cache = {} + skipped_count = 0 for key, value in ImageCacheManager.memory_cache.items(): if isinstance(value, tuple) and len(value) == 2: caption, timestamp = value - serializable_cache[key] = [caption, timestamp] + # 验证条目有效性 + if isinstance(caption, str) and isinstance(timestamp, (int, float)): + serializable_cache[key] = [caption, timestamp] + else: + skipped_count += 1 + logger.debug(f"跳过格式不正确的缓存条目: {key}") else: - serializable_cache[key] = value + skipped_count += 1 + logger.debug(f"跳过格式不正确的缓存条目: {key}") with open(cache_file, "w", encoding="utf-8") as f: json.dump(serializable_cache, f, ensure_ascii=False, indent=2) - logger.debug(f"成功保存 {len(ImageCacheManager.memory_cache)} 条图片缓存到磁盘") + if skipped_count > 0: + logger.debug(f"成功保存 {len(serializable_cache)} 条有效缓存到磁盘,跳过 {skipped_count} 条格式不正确的条目") + else: + logger.debug(f"成功保存 {len(serializable_cache)} 条图片缓存到磁盘") except Exception as e: logger.error(f"保存缓存到磁盘失败: {e}") @@ -140,15 +175,20 @@ def get(image: str) -> Optional[str]: if image_hash in ImageCacheManager.memory_cache: cached_data = ImageCacheManager.memory_cache[image_hash] - # 处理缓存数据格式 - if isinstance(cached_data, tuple) and len(cached_data) >= 1: - caption = cached_data[0] - elif isinstance(cached_data, list) and len(cached_data) >= 1: - caption = cached_data[0] + # 统一处理缓存数据格式(要求严格的tuple/list格式,恰好包含2个元素) + if isinstance(cached_data, (tuple, list)) and len(cached_data) == 2: + caption, timestamp = cached_data[0], cached_data[1] + # 验证提取的值类型 + if not isinstance(caption, str): + logger.warning(f"缓存条目格式不正确,期望字符串但获得 {type(caption).__name__}") + return None + if not isinstance(timestamp, (int, float)): + logger.warning(f"缓存条目时间戳格式不正确,期望数字但获得 {type(timestamp).__name__}") + return None else: - caption = cached_data + logger.warning(f"缓存条目格式不正确,期望恰好2个元素的tuple/list但获得 {type(cached_data).__name__}") + return None - logger.debug(f"命中图片描述缓存: {image[:50]}...") return caption return None @@ -175,12 +215,13 @@ def set(image: str, caption: str) -> bool: # 存储为元组 (caption, timestamp) 用于后续清理 ImageCacheManager.memory_cache[image_hash] = (caption, time.time()) - # 异步保存到磁盘(通过随机概率减少I/O) - import random - if random.random() < 0.2: # 20% 的概率保存一次 + # 基于阈值的周期性保存(更稳定,避免过度I/O) + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 - logger.debug(f"缓存图片描述: {image[:50]}... -> {caption}") + logger.debug(f"缓存图片描述: {image[:50]}...") return True except Exception as e: @@ -197,6 +238,7 @@ def clear() -> bool: """ try: ImageCacheManager.memory_cache.clear() + ImageCacheManager.write_count = 0 cache_file = ImageCacheManager._get_cache_file_path() if os.path.exists(cache_file): @@ -222,23 +264,33 @@ def cleanup_old_entries() -> None: return image_processing_config = ImageCacheManager.config.get("image_processing", {}) - retention_days = image_processing_config.get("image_retention_days", 7) + retention_days = image_processing_config.get("image_retention_days", ImageCacheManager.DEFAULT_RETENTION_DAYS) - if retention_days < 1 or retention_days > 365: - logger.warning(f"图片保留天数配置无效: {retention_days},使用默认值7天") - retention_days = 7 + # 验证配置值有效性 + if retention_days < 1 or retention_days > ImageCacheManager.MAX_RETENTION_DAYS: + logger.warning(f"图片保留天数配置无效: {retention_days},使用默认值{ImageCacheManager.DEFAULT_RETENTION_DAYS}天") + retention_days = ImageCacheManager.DEFAULT_RETENTION_DAYS current_time = time.time() - cleanup_threshold = retention_days * 24 * 3600 # 配置的天数转换为秒 + cleanup_threshold = retention_days * ImageCacheManager.HOURS_PER_DAY * ImageCacheManager.SECONDS_PER_HOUR removed_count = 0 keys_to_remove = [] for key, value in ImageCacheManager.memory_cache.items(): - if isinstance(value, tuple) and len(value) == 2: - caption, timestamp = value - if current_time - timestamp > cleanup_threshold: - keys_to_remove.append(key) - removed_count += 1 + # 统一处理所有格式的缓存条目,要求恰好2个元素 + timestamp = None + if isinstance(value, (tuple, list)) and len(value) == 2: + timestamp = value[1] + + # 如果没有有效的时间戳,视为损坏的条目,标记删除 + if timestamp is None: + keys_to_remove.append(key) + removed_count += 1 + logger.debug(f"删除时间戳无效的缓存条目: {key}") + # 检查是否超过保留期限 + elif current_time - timestamp > cleanup_threshold: + keys_to_remove.append(key) + removed_count += 1 for key in keys_to_remove: del ImageCacheManager.memory_cache[key] diff --git a/utils/image_caption.py b/utils/image_caption.py index c44bbb2..66fae94 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -88,7 +88,7 @@ async def call_llm(): # 缓存结果到持久化缓存 if caption: ImageCacheManager.set(image, caption) - logger.debug(f"缓存图片描述: {image[:50]}... -> {caption}") + logger.debug(f"缓存到持久化存储: {image[:50]}...") return caption except asyncio.TimeoutError: From 8901ffdb1fb1636185696661516bc8525e9832bb Mon Sep 17 00:00:00 2001 From: nbnbnd Date: Sat, 16 May 2026 03:38:28 +0800 Subject: [PATCH 03/18] Remove history length limit trimming in save_message --- utils/history_storage.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/utils/history_storage.py b/utils/history_storage.py index 173f6bb..8e6a231 100644 --- a/utils/history_storage.py +++ b/utils/history_storage.py @@ -113,10 +113,6 @@ async def save_message(message: AstrBotMessage, platform_name: str, chat_id_over sanitized_message = HistoryStorage._sanitize_message(message) history.append(sanitized_message) - # 限制历史记录数量 - if len(history) > 200: - history = history[-200:] - # 确保父目录存在 os.makedirs(os.path.dirname(file_path), exist_ok=True) @@ -463,4 +459,3 @@ def _cleanup_old_images() -> None: except Exception as e: logger.error(f"清理图片文件时发生错误: {e}") - From f3b488dfaa5da35b462f87a0851fd68f5bd0f02d Mon Sep 17 00:00:00 2001 From: nbnbnd Date: Sat, 16 May 2026 04:05:41 +0800 Subject: [PATCH 04/18] Remove parenthetical content from hint strings in _conf_schema.json --- _conf_schema.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_conf_schema.json b/_conf_schema.json index 3a1557a..e9fc26c 100644 --- a/_conf_schema.json +++ b/_conf_schema.json @@ -2,7 +2,7 @@ "group_msg_history": { "description": "输入给大模型的消息数量", "type": "int", - "hint": "决定了会输入给大模型多少条q群历史消息(最多200条)", + "hint": "决定了会输入给大模型多少条q群历史消息", "default": 20 }, "enable_all_groups": { @@ -93,7 +93,7 @@ "image_count":{ "description":"直接输入给大模型的图片数量", "type":"int", - "hint":"决定了会直接输入给大模型多少张图片,仅限支持图片输入的多模态模型可用(和下面的图像转述功能可以同时开启,最近的指定数量图片会直接输入)", + "hint":"决定了会直接输入给大模型多少张图片,仅限支持图片输入的多模态模型可用", "default":0 }, "use_image_caption": { @@ -128,4 +128,4 @@ } } } -} \ No newline at end of file +} From 3ec5ac2abc2661291628819e2c46a010ce35b166 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 04:24:49 +0000 Subject: [PATCH 05/18] Initial plan From 4010a1d6412290ca6327cccf169e0339e9e7c3c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 04:26:49 +0000 Subject: [PATCH 06/18] feat: skip repeated failed image caption attempts Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/43e34f76-d794-45dc-aa92-1def408b57cf Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_cache.py | 140 +++++++++++++++++++++++++++++++++++++++-- utils/image_caption.py | 22 ++++++- utils/message_utils.py | 21 ++++++- 3 files changed, 175 insertions(+), 8 deletions(-) diff --git a/utils/image_cache.py b/utils/image_cache.py index 8f90c82..c0f6242 100644 --- a/utils/image_cache.py +++ b/utils/image_cache.py @@ -27,6 +27,8 @@ class ImageCacheManager: base_storage_path: Optional[str] = None # 内存缓存(用于快速查询) memory_cache: Dict[str, tuple[str, float]] = {} + # 失败记录缓存(hash -> failure_timestamp) + failure_cache: Dict[str, float] = {} # 记录写入次数,用于周期性保存 write_count: int = 0 @@ -41,6 +43,7 @@ def init(config: AstrBotConfig): ImageCacheManager.config = config ImageCacheManager.write_count = 0 # 重置写入计数 ImageCacheManager.memory_cache.clear() # 清空内存缓存,确保从磁盘重新加载 + ImageCacheManager.failure_cache.clear() # 清空失败缓存,确保从磁盘重新加载 # 初始化基础存储路径 from astrbot.core.utils.astrbot_path import get_astrbot_data_path astrbot_data_path = get_astrbot_data_path() @@ -96,9 +99,18 @@ def _load_cache_from_disk() -> None: with open(cache_file, "r", encoding="utf-8") as f: cache_data = json.load(f) - # 加载缓存到内存,统一转换为元组格式,使用严格验证 - if isinstance(cache_data, dict): - for key, value in cache_data.items(): + # 兼容两种格式: + # 1) 旧格式: {hash: [caption, timestamp]} + # 2) 新格式: {"captions": {...}, "failures": {...}} + caption_data = cache_data + failure_data = {} + if isinstance(cache_data, dict) and ("captions" in cache_data or "failures" in cache_data): + caption_data = cache_data.get("captions", {}) + failure_data = cache_data.get("failures", {}) + + # 加载成功缓存到内存 + if isinstance(caption_data, dict): + for key, value in caption_data.items(): try: # 要求恰好2个元素 if isinstance(value, (list, tuple)) and len(value) == 2: @@ -115,8 +127,18 @@ def _load_cache_from_disk() -> None: logger.info(f"成功从磁盘加载 {len(ImageCacheManager.memory_cache)} 条图片缓存") else: - logger.warning(f"缓存文件格式不正确,跳过加载") - + logger.warning(f"图片缓存数据格式不正确,跳过加载") + + # 加载失败缓存到内存 + if isinstance(failure_data, dict): + for key, value in failure_data.items(): + if isinstance(value, (int, float)): + ImageCacheManager.failure_cache[key] = float(value) + else: + logger.warning(f"失败缓存条目格式不正确,跳过: {key}") + elif failure_data: + logger.warning("失败缓存数据格式不正确,跳过加载") + except Exception as e: logger.error(f"从磁盘加载缓存失败: {e}") logger.debug(traceback.format_exc()) @@ -146,8 +168,23 @@ def _save_cache_to_disk() -> None: skipped_count += 1 logger.debug(f"跳过格式不正确的缓存条目: {key}") + serializable_failures = {} + for key, value in ImageCacheManager.failure_cache.items(): + if isinstance(value, (int, float)): + serializable_failures[key] = float(value) + else: + logger.debug(f"跳过格式不正确的失败缓存条目: {key}") + with open(cache_file, "w", encoding="utf-8") as f: - json.dump(serializable_cache, f, ensure_ascii=False, indent=2) + json.dump( + { + "captions": serializable_cache, + "failures": serializable_failures + }, + f, + ensure_ascii=False, + indent=2 + ) if skipped_count > 0: logger.debug(f"成功保存 {len(serializable_cache)} 条有效缓存到磁盘,跳过 {skipped_count} 条格式不正确的条目") @@ -238,6 +275,7 @@ def clear() -> bool: """ try: ImageCacheManager.memory_cache.clear() + ImageCacheManager.failure_cache.clear() ImageCacheManager.write_count = 0 cache_file = ImageCacheManager._get_cache_file_path() @@ -294,6 +332,18 @@ def cleanup_old_entries() -> None: for key in keys_to_remove: del ImageCacheManager.memory_cache[key] + + failure_keys_to_remove = [] + for key, timestamp in ImageCacheManager.failure_cache.items(): + if not isinstance(timestamp, (int, float)): + failure_keys_to_remove.append(key) + removed_count += 1 + elif current_time - timestamp > cleanup_threshold: + failure_keys_to_remove.append(key) + removed_count += 1 + + for key in failure_keys_to_remove: + del ImageCacheManager.failure_cache[key] if removed_count > 0: logger.info(f"清理过期缓存完成,清理了 {removed_count} 条超过 {retention_days} 天的缓存条目") @@ -310,3 +360,81 @@ def force_save() -> None: ImageCacheManager._save_cache_to_disk() except Exception as e: logger.error(f"强制保存缓存失败: {e}") + + @staticmethod + def get_failed_timestamp(image: str) -> Optional[float]: + """ + 获取图片最近一次转述失败时间戳 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + timestamp = ImageCacheManager.failure_cache.get(image_hash) + if isinstance(timestamp, (int, float)): + return float(timestamp) + return None + except Exception as e: + logger.error(f"获取失败记录失败: {e}") + return None + + @staticmethod + def is_failed(image: str) -> bool: + """ + 判断图片是否有失败记录 + """ + return ImageCacheManager.get_failed_timestamp(image) is not None + + @staticmethod + def set_failed(image: str) -> bool: + """ + 记录图片转述失败 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + ImageCacheManager.failure_cache[image_hash] = time.time() + + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: + ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 + + return True + except Exception as e: + logger.error(f"记录失败缓存失败: {e}") + return False + + @staticmethod + def clear_failed(image: str) -> bool: + """ + 清理图片失败记录 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + if image_hash in ImageCacheManager.failure_cache: + del ImageCacheManager.failure_cache[image_hash] + + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: + ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 + + return True + except Exception as e: + logger.error(f"清理失败缓存失败: {e}") + return False + + @staticmethod + def should_skip_failed_image(image: str, latest_success_timestamp: Optional[float], window_seconds: int) -> bool: + """ + 判断失败图片是否应跳过转述: + - 存在失败记录 + - 失败时间早于最近成功时间 + - 且二者间隔在窗口时间内 + """ + if latest_success_timestamp is None or window_seconds <= 0: + return False + + failed_timestamp = ImageCacheManager.get_failed_timestamp(image) + if failed_timestamp is None: + return False + + return failed_timestamp <= latest_success_timestamp and (latest_success_timestamp - failed_timestamp) <= window_seconds diff --git a/utils/image_caption.py b/utils/image_caption.py index 66fae94..5c5128a 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -13,6 +13,7 @@ class ImageCaptionUtils: # 保存context和config对象的静态变量 context: Optional[Context] = None config: Optional[AstrBotConfig] = None + DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS = 300 @staticmethod def init(context: Context, config: AstrBotConfig): @@ -26,7 +27,8 @@ def init(context: Context, config: AstrBotConfig): async def generate_image_caption( image: str, # 图片的base64编码或URL umo: Optional[str] = None, # unified_msg_origin,用于 UMO 路由 - timeout: int = 30 + timeout: int = 30, + latest_success_timestamp: Optional[float] = None ) -> Optional[str]: """ 为单张图片生成文字描述 @@ -35,6 +37,7 @@ async def generate_image_caption( image: 图片的base64编码或URL umo: unified_msg_origin,用于获取对应 UMO 的 provider timeout: 超时时间(秒) + latest_success_timestamp: 最近一次成功转述时间戳(用于失败图片跳过策略) Returns: 生成的图片描述文本,如果失败则返回None @@ -42,6 +45,7 @@ async def generate_image_caption( # 检查持久化缓存 cached_caption = ImageCacheManager.get(image) if cached_caption is not None: + ImageCacheManager.clear_failed(image) logger.debug(f"命中图片描述缓存: {image[:50]}...") return cached_caption @@ -58,6 +62,17 @@ async def generate_image_caption( if not image_processing_config.get("use_image_caption", False): return None + skip_window_seconds = image_processing_config.get( + "failed_image_skip_window_seconds", + ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + ) + if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: + skip_window_seconds = ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + + if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): + logger.debug(f"跳过失败图片转述(失败记录在最近成功转述之前): {image[:50]}...") + return None + provider_id = image_processing_config.get("image_caption_provider_id", "") # 获取提供商,支持 UMO 路由 if provider_id == "": @@ -88,12 +103,17 @@ async def call_llm(): # 缓存结果到持久化缓存 if caption: ImageCacheManager.set(image, caption) + ImageCacheManager.clear_failed(image) logger.debug(f"缓存到持久化存储: {image[:50]}...") + else: + ImageCacheManager.set_failed(image) return caption except asyncio.TimeoutError: logger.warning(f"图片转述超时,超过了{timeout}秒") + ImageCacheManager.set_failed(image) return None except Exception as e: logger.error(f"图片转述失败: {e}") + ImageCacheManager.set_failed(image) return None diff --git a/utils/message_utils.py b/utils/message_utils.py index d66dc74..0098a56 100644 --- a/utils/message_utils.py +++ b/utils/message_utils.py @@ -4,6 +4,7 @@ import time from datetime import datetime from .image_caption import ImageCaptionUtils +from .image_cache import ImageCacheManager import asyncio import json import traceback @@ -82,6 +83,7 @@ async def outline_message_list(message_list: List[BaseMessageComponent], umo: Op umo: unified_msg_origin,用于 UMO 路由 """ outline = "" + latest_success_timestamp: Optional[float] = None for i in message_list: try: # 获取组件类型 @@ -110,9 +112,26 @@ async def outline_message_list(message_list: List[BaseMessageComponent], umo: Op continue image = image_path - caption = await ImageCaptionUtils.generate_image_caption(image, umo=umo) + image_processing_config = ImageCaptionUtils.config.get("image_processing", {}) if ImageCaptionUtils.config else {} + skip_window_seconds = image_processing_config.get( + "failed_image_skip_window_seconds", + ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + ) + if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: + skip_window_seconds = ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + + if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): + outline += f"[图片]" + continue + + caption = await ImageCaptionUtils.generate_image_caption( + image, + umo=umo, + latest_success_timestamp=latest_success_timestamp + ) if caption: outline += f"[图片: {caption}]" + latest_success_timestamp = time.time() else: outline += f"[图片]" else: From 240e5f8691ae76bddab626217e4dcc2943ffe89a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 04:27:55 +0000 Subject: [PATCH 07/18] refactor: deduplicate failed-image skip checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/43e34f76-d794-45dc-aa92-1def408b57cf Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_cache.py | 10 +++++++++- utils/image_caption.py | 25 +++++++++++++++++++------ utils/message_utils.py | 13 ------------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/utils/image_cache.py b/utils/image_cache.py index c0f6242..675661a 100644 --- a/utils/image_cache.py +++ b/utils/image_cache.py @@ -429,6 +429,14 @@ def should_skip_failed_image(image: str, latest_success_timestamp: Optional[floa - 存在失败记录 - 失败时间早于最近成功时间 - 且二者间隔在窗口时间内 + + Args: + image: 图片的base64编码或URL + latest_success_timestamp: 最近一次成功转述的时间戳 + window_seconds: 失败记录与最近成功记录可判定为“相近”的时间窗口(秒) + + Returns: + 是否应跳过该图片转述 """ if latest_success_timestamp is None or window_seconds <= 0: return False @@ -437,4 +445,4 @@ def should_skip_failed_image(image: str, latest_success_timestamp: Optional[floa if failed_timestamp is None: return False - return failed_timestamp <= latest_success_timestamp and (latest_success_timestamp - failed_timestamp) <= window_seconds + return failed_timestamp < latest_success_timestamp and (latest_success_timestamp - failed_timestamp) <= window_seconds diff --git a/utils/image_caption.py b/utils/image_caption.py index 5c5128a..ee4a1d5 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -23,6 +23,24 @@ def init(context: Context, config: AstrBotConfig): # 初始化图片缓存管理器 ImageCacheManager.init(config) + @staticmethod + def get_failed_image_skip_window_seconds() -> int: + """ + 获取失败图片跳过策略的时间窗口(秒) + """ + config = ImageCaptionUtils.config + if not config: + return ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + + image_processing_config = config.get("image_processing", {}) + skip_window_seconds = image_processing_config.get( + "failed_image_skip_window_seconds", + ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + ) + if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: + return ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + return skip_window_seconds + @staticmethod async def generate_image_caption( image: str, # 图片的base64编码或URL @@ -62,12 +80,7 @@ async def generate_image_caption( if not image_processing_config.get("use_image_caption", False): return None - skip_window_seconds = image_processing_config.get( - "failed_image_skip_window_seconds", - ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS - ) - if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: - skip_window_seconds = ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + skip_window_seconds = ImageCaptionUtils.get_failed_image_skip_window_seconds() if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): logger.debug(f"跳过失败图片转述(失败记录在最近成功转述之前): {image[:50]}...") diff --git a/utils/message_utils.py b/utils/message_utils.py index 0098a56..b7342b5 100644 --- a/utils/message_utils.py +++ b/utils/message_utils.py @@ -4,7 +4,6 @@ import time from datetime import datetime from .image_caption import ImageCaptionUtils -from .image_cache import ImageCacheManager import asyncio import json import traceback @@ -112,18 +111,6 @@ async def outline_message_list(message_list: List[BaseMessageComponent], umo: Op continue image = image_path - image_processing_config = ImageCaptionUtils.config.get("image_processing", {}) if ImageCaptionUtils.config else {} - skip_window_seconds = image_processing_config.get( - "failed_image_skip_window_seconds", - ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS - ) - if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: - skip_window_seconds = ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS - - if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): - outline += f"[图片]" - continue - caption = await ImageCaptionUtils.generate_image_caption( image, umo=umo, From 9f3e898649d13bb87d86e9106fe3a303352a9800 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 04:28:38 +0000 Subject: [PATCH 08/18] chore: clarify failed-image skip diagnostics Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/43e34f76-d794-45dc-aa92-1def408b57cf Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_cache.py | 6 +++--- utils/image_caption.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/image_cache.py b/utils/image_cache.py index 675661a..21a9558 100644 --- a/utils/image_cache.py +++ b/utils/image_cache.py @@ -127,7 +127,7 @@ def _load_cache_from_disk() -> None: logger.info(f"成功从磁盘加载 {len(ImageCacheManager.memory_cache)} 条图片缓存") else: - logger.warning(f"图片缓存数据格式不正确,跳过加载") + logger.warning(f"图片缓存数据格式不正确,期望 dict,实际为 {type(caption_data).__name__},跳过加载") # 加载失败缓存到内存 if isinstance(failure_data, dict): @@ -427,8 +427,8 @@ def should_skip_failed_image(image: str, latest_success_timestamp: Optional[floa """ 判断失败图片是否应跳过转述: - 存在失败记录 - - 失败时间早于最近成功时间 - - 且二者间隔在窗口时间内 + - 失败时间早于最近成功时间(表示这张图是在该次成功之前失败的) + - 且二者间隔在窗口时间内(避免无限期跳过) Args: image: 图片的base64编码或URL diff --git a/utils/image_caption.py b/utils/image_caption.py index ee4a1d5..c70f4ca 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -83,7 +83,7 @@ async def generate_image_caption( skip_window_seconds = ImageCaptionUtils.get_failed_image_skip_window_seconds() if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): - logger.debug(f"跳过失败图片转述(失败记录在最近成功转述之前): {image[:50]}...") + logger.debug(f"跳过失败图片转述(该图片失败记录早于本轮最近一次成功,且时间间隔在窗口内): {image[:50]}...") return None provider_id = image_processing_config.get("image_caption_provider_id", "") From 84bf0bc873f5b61152f621407afd32be0508b818 Mon Sep 17 00:00:00 2001 From: nbnbnd Date: Tue, 19 May 2026 00:28:07 +0800 Subject: [PATCH 09/18] Ensure image is accessible before captioning --- utils/image_caption.py | 56 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/utils/image_caption.py b/utils/image_caption.py index c70f4ca..151b2fd 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -1,6 +1,10 @@ from astrbot.api.all import * from typing import Optional import asyncio +import base64 +import binascii +import urllib.request +import urllib.error from .image_cache import ImageCacheManager class ImageCaptionUtils: @@ -41,6 +45,51 @@ def get_failed_image_skip_window_seconds() -> int: return ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS return skip_window_seconds + @staticmethod + def _check_url_accessible(url: str, timeout: int) -> bool: + """ + 同步检查图片 URL 是否可访问(供异步线程调用) + """ + try: + # 先尝试 HEAD,某些服务器不支持再回退 GET + req = urllib.request.Request(url, method="HEAD") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return 200 <= getattr(resp, "status", 200) < 400 + except Exception: + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: + return 200 <= getattr(resp, "status", 200) < 400 + except Exception: + return False + + @staticmethod + async def _ensure_image_accessible(image: str, timeout: int) -> bool: + """ + 确保图片存在且可获取 + """ + if not image: + return False + + if image.startswith("http://") or image.startswith("https://"): + return await asyncio.to_thread(ImageCaptionUtils._check_url_accessible, image, timeout) + + if image.startswith("data:"): + try: + header, b64data = image.split(",", 1) + if "base64" not in header: + return False + base64.b64decode(b64data, validate=True) + return True + except (ValueError, binascii.Error): + return False + + # 普通 base64 字符串 + try: + base64.b64decode(image, validate=True) + return True + except (binascii.Error, ValueError): + return False + @staticmethod async def generate_image_caption( image: str, # 图片的base64编码或URL @@ -86,6 +135,13 @@ async def generate_image_caption( logger.debug(f"跳过失败图片转述(该图片失败记录早于本轮最近一次成功,且时间间隔在窗口内): {image[:50]}...") return None + # 在调用大模型前确认图片可获取 + image_accessible = await ImageCaptionUtils._ensure_image_accessible(image, timeout=min(timeout, 10)) + if not image_accessible: + logger.warning(f"图片无法获取或不存在,已跳过转述: {image[:50]}...") + ImageCacheManager.set_failed(image) + return None + provider_id = image_processing_config.get("image_caption_provider_id", "") # 获取提供商,支持 UMO 路由 if provider_id == "": From 28c95172a6a4e07d26f52ddca4d36008d5279df0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:34:23 +0000 Subject: [PATCH 10/18] Ensure image accessible before captioning Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 55 +++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 151b2fd..d78441d 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -3,6 +3,8 @@ import asyncio import base64 import binascii +import os +import urllib.parse import urllib.request import urllib.error from .image_cache import ImageCacheManager @@ -51,16 +53,32 @@ def _check_url_accessible(url: str, timeout: int) -> bool: 同步检查图片 URL 是否可访问(供异步线程调用) """ try: - # 先尝试 HEAD,某些服务器不支持再回退 GET - req = urllib.request.Request(url, method="HEAD") + req = urllib.request.Request(url, method="GET") with urllib.request.urlopen(req, timeout=timeout) as resp: - return 200 <= getattr(resp, "status", 200) < 400 + status = getattr(resp, "status", 200) + if not (200 <= status < 400): + return False + try: + return bool(resp.read(1)) + except Exception: + return False except Exception: - try: - with urllib.request.urlopen(url, timeout=timeout) as resp: - return 200 <= getattr(resp, "status", 200) < 400 - except Exception: + return False + + @staticmethod + def _check_local_image_accessible(image_path: str) -> bool: + """ + 同步检查本地图片是否存在且可读取(供异步线程调用) + """ + try: + if not image_path: return False + if not os.path.exists(image_path) or not os.path.isfile(image_path): + return False + with open(image_path, "rb") as f: + return bool(f.read(1)) + except Exception: + return False @staticmethod async def _ensure_image_accessible(image: str, timeout: int) -> bool: @@ -73,20 +91,35 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: if image.startswith("http://") or image.startswith("https://"): return await asyncio.to_thread(ImageCaptionUtils._check_url_accessible, image, timeout) + if image.startswith("file://"): + try: + parsed = urllib.parse.urlparse(image) + image_path = urllib.parse.unquote(parsed.path or "") + if parsed.netloc: + image_path = f"{parsed.netloc}{image_path}" + if not image_path: + return False + return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image_path) + except Exception: + return False + + if os.path.isabs(image) or image.startswith("."): + return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image) + if image.startswith("data:"): try: header, b64data = image.split(",", 1) if "base64" not in header: return False - base64.b64decode(b64data, validate=True) - return True + decoded = base64.b64decode(b64data, validate=True) + return bool(decoded) except (ValueError, binascii.Error): return False # 普通 base64 字符串 try: - base64.b64decode(image, validate=True) - return True + decoded = base64.b64decode(image, validate=True) + return bool(decoded) except (binascii.Error, ValueError): return False From a7e0d83328dcf9160de534e85b08b1706ccf8590 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:35:35 +0000 Subject: [PATCH 11/18] Refine image access checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index d78441d..7e4fb1d 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -53,7 +53,7 @@ def _check_url_accessible(url: str, timeout: int) -> bool: 同步检查图片 URL 是否可访问(供异步线程调用) """ try: - req = urllib.request.Request(url, method="GET") + req = urllib.request.Request(url, method="GET", headers={"Range": "bytes=0-0"}) with urllib.request.urlopen(req, timeout=timeout) as resp: status = getattr(resp, "status", 200) if not (200 <= status < 400): @@ -94,16 +94,20 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: if image.startswith("file://"): try: parsed = urllib.parse.urlparse(image) - image_path = urllib.parse.unquote(parsed.path or "") - if parsed.netloc: - image_path = f"{parsed.netloc}{image_path}" + if parsed.netloc and parsed.netloc not in ("", "localhost"): + if os.name == "nt": + image_path = f"\\\\{parsed.netloc}{urllib.request.url2pathname(parsed.path or '')}" + else: + return False + else: + image_path = urllib.request.url2pathname(parsed.path or "") if not image_path: return False return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image_path) except Exception: return False - if os.path.isabs(image) or image.startswith("."): + if os.path.isabs(image) or image.startswith(("./", "../", ".\\", "..\\")): return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image) if image.startswith("data:"): From f066b72eb18c5e4210eec29e9e9a58bee0dd3457 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:36:36 +0000 Subject: [PATCH 12/18] Harden image accessibility checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 7e4fb1d..f34c427 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -54,6 +54,21 @@ def _check_url_accessible(url: str, timeout: int) -> bool: """ try: req = urllib.request.Request(url, method="GET", headers={"Range": "bytes=0-0"}) + with urllib.request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 200) + if 200 <= status < 400: + try: + return bool(resp.read(1)) + except Exception: + return False + except urllib.error.HTTPError as e: + if e.code not in (400, 405, 416): + return False + except Exception: + return False + + try: + req = urllib.request.Request(url, method="GET") with urllib.request.urlopen(req, timeout=timeout) as resp: status = getattr(resp, "status", 200) if not (200 <= status < 400): @@ -98,6 +113,7 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: if os.name == "nt": image_path = f"\\\\{parsed.netloc}{urllib.request.url2pathname(parsed.path or '')}" else: + logger.warning(f"不支持的 file:// 网络路径: {image}") return False else: image_path = urllib.request.url2pathname(parsed.path or "") @@ -107,8 +123,9 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: except Exception: return False - if os.path.isabs(image) or image.startswith(("./", "../", ".\\", "..\\")): - return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image) + expanded_path = os.path.expanduser(image) + if os.path.exists(expanded_path): + return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) if image.startswith("data:"): try: From e7d0dd15268db50ef2a06d975a028360bdbc6602 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:38:02 +0000 Subject: [PATCH 13/18] Refine URL and path checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 48 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index f34c427..9282eb6 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -52,17 +52,47 @@ def _check_url_accessible(url: str, timeout: int) -> bool: """ 同步检查图片 URL 是否可访问(供异步线程调用) """ + head_fallback_statuses = { + 400, # Bad Request(部分代理/服务不支持 HEAD) + 405, # Method Not Allowed + 501, # Not Implemented + } + range_fallback_statuses = { + 400, # Bad Request(部分服务不支持 Range) + 416, # Range Not Satisfiable + } try: - req = urllib.request.Request(url, method="GET", headers={"Range": "bytes=0-0"}) + req = urllib.request.Request(url, method="HEAD") with urllib.request.urlopen(req, timeout=timeout) as resp: status = getattr(resp, "status", 200) - if 200 <= status < 400: + if not (200 <= status < 400): + return False + content_length = resp.headers.get("Content-Length") + if content_length is not None: try: - return bool(resp.read(1)) - except Exception: - return False + if int(content_length) <= 0: + return False + except (TypeError, ValueError): + pass + return True + except urllib.error.HTTPError as e: + if e.code not in head_fallback_statuses: + return False + except Exception: + return False + + try: + req = urllib.request.Request(url, method="GET", headers={"Range": "bytes=0-0"}) + with urllib.request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 200) + if not (200 <= status < 400): + return False + try: + return bool(resp.read(1)) + except Exception: + return False except urllib.error.HTTPError as e: - if e.code not in (400, 405, 416): + if e.code not in range_fallback_statuses: return False except Exception: return False @@ -99,6 +129,8 @@ def _check_local_image_accessible(image_path: str) -> bool: async def _ensure_image_accessible(image: str, timeout: int) -> bool: """ 确保图片存在且可获取 + + 注意:file:// 的网络路径仅在 Windows 下支持,其他平台会直接拒绝。 """ if not image: return False @@ -126,6 +158,10 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: expanded_path = os.path.expanduser(image) if os.path.exists(expanded_path): return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) + if expanded_path != image: + return False + if (os.path.sep in image or (os.path.altsep and os.path.altsep in image)) and "." in image: + return False if image.startswith("data:"): try: From f3bcd35429399f04516ac0f7da13484d21e2385a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:38:51 +0000 Subject: [PATCH 14/18] Tighten URL and path handling Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 9282eb6..9f24d7f 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -4,6 +4,7 @@ import base64 import binascii import os +import re import urllib.parse import urllib.request import urllib.error @@ -75,6 +76,7 @@ def _check_url_accessible(url: str, timeout: int) -> bool: except (TypeError, ValueError): pass return True + return True except urllib.error.HTTPError as e: if e.code not in head_fallback_statuses: return False @@ -143,6 +145,9 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: parsed = urllib.parse.urlparse(image) if parsed.netloc and parsed.netloc not in ("", "localhost"): if os.name == "nt": + if not re.fullmatch(r"[A-Za-z0-9._-]+", parsed.netloc): + logger.warning(f"不安全的 file:// 网络地址: {image}") + return False image_path = f"\\\\{parsed.netloc}{urllib.request.url2pathname(parsed.path or '')}" else: logger.warning(f"不支持的 file:// 网络路径: {image}") @@ -160,7 +165,9 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) if expanded_path != image: return False - if (os.path.sep in image or (os.path.altsep and os.path.altsep in image)) and "." in image: + path_ext = os.path.splitext(image)[1] + # 若包含路径分隔符且带扩展名,则按路径处理,避免误判为 base64 + if (os.path.sep in image or (os.path.altsep and os.path.altsep in image)) and path_ext: return False if image.startswith("data:"): From fb9a54f1f2dea253dac930b3b84934644598b87b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:39:53 +0000 Subject: [PATCH 15/18] Validate file netloc and base64 checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 9f24d7f..019cd73 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -75,7 +75,6 @@ def _check_url_accessible(url: str, timeout: int) -> bool: return False except (TypeError, ValueError): pass - return True return True except urllib.error.HTTPError as e: if e.code not in head_fallback_statuses: @@ -127,6 +126,23 @@ def _check_local_image_accessible(image_path: str) -> bool: except Exception: return False + @staticmethod + def _is_safe_file_netloc(netloc: str) -> bool: + """ + 校验 file:// 的 netloc 是否安全(仅允许主机名格式) + """ + if not netloc or len(netloc) > 253: + return False + labels = netloc.split(".") + for label in labels: + if not label or len(label) > 63: + return False + if label[0] == "-" or label[-1] == "-": + return False + if not re.fullmatch(r"[A-Za-z0-9_-]+", label): + return False + return True + @staticmethod async def _ensure_image_accessible(image: str, timeout: int) -> bool: """ @@ -145,7 +161,7 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: parsed = urllib.parse.urlparse(image) if parsed.netloc and parsed.netloc not in ("", "localhost"): if os.name == "nt": - if not re.fullmatch(r"[A-Za-z0-9._-]+", parsed.netloc): + if not ImageCaptionUtils._is_safe_file_netloc(parsed.netloc): logger.warning(f"不安全的 file:// 网络地址: {image}") return False image_path = f"\\\\{parsed.netloc}{urllib.request.url2pathname(parsed.path or '')}" @@ -175,15 +191,15 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: header, b64data = image.split(",", 1) if "base64" not in header: return False - decoded = base64.b64decode(b64data, validate=True) - return bool(decoded) + base64.b64decode(b64data, validate=True) + return True except (ValueError, binascii.Error): return False # 普通 base64 字符串 try: - decoded = base64.b64decode(image, validate=True) - return bool(decoded) + base64.b64decode(image, validate=True) + return True except (binascii.Error, ValueError): return False From 432b96995b4b80bf3d89a4e8604ed28dd0384296 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:40:42 +0000 Subject: [PATCH 16/18] Harden file URI validation Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 019cd73..816a9a3 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -21,6 +21,7 @@ class ImageCaptionUtils: context: Optional[Context] = None config: Optional[AstrBotConfig] = None DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS = 300 + SAFE_NETLOC_LABEL_RE = re.compile(r"[A-Za-z0-9_-]+") @staticmethod def init(context: Context, config: AstrBotConfig): @@ -137,12 +138,24 @@ def _is_safe_file_netloc(netloc: str) -> bool: for label in labels: if not label or len(label) > 63: return False - if label[0] == "-" or label[-1] == "-": + if label.startswith("-") or label.endswith("-"): return False - if not re.fullmatch(r"[A-Za-z0-9_-]+", label): + if not ImageCaptionUtils.SAFE_NETLOC_LABEL_RE.fullmatch(label): return False return True + @staticmethod + def _is_safe_unc_path(path: str) -> bool: + """ + 校验 UNC 路径是否包含可疑的路径穿越片段 + """ + if not path: + return False + if ":" in path: + return False + normalized = path.replace("\\", "/") + return ".." not in normalized.split("/") + @staticmethod async def _ensure_image_accessible(image: str, timeout: int) -> bool: """ @@ -164,7 +177,11 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: if not ImageCaptionUtils._is_safe_file_netloc(parsed.netloc): logger.warning(f"不安全的 file:// 网络地址: {image}") return False - image_path = f"\\\\{parsed.netloc}{urllib.request.url2pathname(parsed.path or '')}" + unc_path = urllib.request.url2pathname(parsed.path or "") + if not ImageCaptionUtils._is_safe_unc_path(unc_path): + logger.warning(f"不安全的 file:// UNC 路径: {image}") + return False + image_path = f"\\\\{parsed.netloc}{unc_path}" else: logger.warning(f"不支持的 file:// 网络路径: {image}") return False @@ -180,6 +197,7 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: if os.path.exists(expanded_path): return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) if expanded_path != image: + # 展开后的路径不同,视为用户路径而非 base64 return False path_ext = os.path.splitext(image)[1] # 若包含路径分隔符且带扩展名,则按路径处理,避免误判为 base64 From 6a35133d5d5aec6d1c3bd4c1c6b67574ad6647e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 May 2026 16:41:29 +0000 Subject: [PATCH 17/18] Simplify URL and path checks Agent-Logs-Url: https://github.com/nbnbnd/astrbot_plugin_SpectreCore/sessions/69569bc0-7274-4da6-8c67-e5d1d9afc192 Co-authored-by: nbnbnd <191479905+nbnbnd@users.noreply.github.com> --- utils/image_caption.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/utils/image_caption.py b/utils/image_caption.py index 816a9a3..c1bdeb4 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -89,10 +89,7 @@ def _check_url_accessible(url: str, timeout: int) -> bool: status = getattr(resp, "status", 200) if not (200 <= status < 400): return False - try: - return bool(resp.read(1)) - except Exception: - return False + return True except urllib.error.HTTPError as e: if e.code not in range_fallback_statuses: return False @@ -105,10 +102,7 @@ def _check_url_accessible(url: str, timeout: int) -> bool: status = getattr(resp, "status", 200) if not (200 <= status < 400): return False - try: - return bool(resp.read(1)) - except Exception: - return False + return True except Exception: return False @@ -196,13 +190,9 @@ async def _ensure_image_accessible(image: str, timeout: int) -> bool: expanded_path = os.path.expanduser(image) if os.path.exists(expanded_path): return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) - if expanded_path != image: + if image.startswith("~"): # 展开后的路径不同,视为用户路径而非 base64 return False - path_ext = os.path.splitext(image)[1] - # 若包含路径分隔符且带扩展名,则按路径处理,避免误判为 base64 - if (os.path.sep in image or (os.path.altsep and os.path.altsep in image)) and path_ext: - return False if image.startswith("data:"): try: From 378e38d42121ae8ffc34da124504c5947089e743 Mon Sep 17 00:00:00 2001 From: nbnbnd Date: Tue, 19 May 2026 02:02:39 +0800 Subject: [PATCH 18/18] Update author and repository information in metadata --- metadata.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata.yaml b/metadata.yaml index 78d60d7..99a6ac6 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -2,6 +2,6 @@ name: spectrecore # 这是你的插件的唯一识别名。 desc: 使大模型更好的主动回复群聊中的消息,带来生动和沉浸的群聊对话体验 # 插件简短描述 help: 自动检测群聊消息并让AI模型进行回复,让群聊更加生动有趣。 # 插件的帮助信息 version: v2.1.11 # 插件版本号。格式:v1.1.1 或者 v1.1 -author: 23q3 # 作者 -repo: https://github.com/23q3/astrbot_plugin_SpectreCore # 插件的仓库地址 +author: 23q3-nbnbnd # 作者 +repo: https://github.com/nbnbnd/astrbot_plugin_SpectreCore # 插件的仓库地址 display_name: 🌟 SpectreCore