diff --git a/_conf_schema.json b/_conf_schema.json index 3a1557a..e9fc26c 100644 --- a/_conf_schema.json +++ b/_conf_schema.json @@ -2,7 +2,7 @@ "group_msg_history": { "description": "输入给大模型的消息数量", "type": "int", - "hint": "决定了会输入给大模型多少条q群历史消息(最多200条)", + "hint": "决定了会输入给大模型多少条q群历史消息", "default": 20 }, "enable_all_groups": { @@ -93,7 +93,7 @@ "image_count":{ "description":"直接输入给大模型的图片数量", "type":"int", - "hint":"决定了会直接输入给大模型多少张图片,仅限支持图片输入的多模态模型可用(和下面的图像转述功能可以同时开启,最近的指定数量图片会直接输入)", + "hint":"决定了会直接输入给大模型多少张图片,仅限支持图片输入的多模态模型可用", "default":0 }, "use_image_caption": { @@ -128,4 +128,4 @@ } } } -} \ No newline at end of file +} diff --git a/main.py b/main.py index 7a4bd43..3fe3ad0 100644 --- a/main.py +++ b/main.py @@ -127,7 +127,8 @@ async def help(self, event: AstrMessageEvent): " 你也可以重置指定群聊天记录 如/sc reset 群号\n" "使用history指令可以查看最近聊天记录 如/sc history\n" "使用mute/闭嘴指令临时禁用自动回复 如/sc mute 5 或 /sc 闭嘴 10\n" - "使用unmute/说话指令解除禁用 如/sc unmute 或 /sc 说话" + "使用unmute/说话指令解除禁用 如/sc unmute 或 /sc 说话\n" + "使用clear_image_cache/清除图片缓存指令清除图片转述缓存 如/sc clear_image_cache" ) platform_name = event.get_platform_name() if platform_name in ("qq_official", "qq_official_webhook"): @@ -286,4 +287,19 @@ async def callllm(self, event: AstrMessageEvent): yield await LLMUtils.call_llm(event, self.config, self.context) except Exception as e: logger.error(f"调用大模型时发生错误: {e}") - yield event.plain_result(f"触发大模型回复失败喵:{str(e)}") \ No newline at end of file + yield event.plain_result(f"触发大模型回复失败喵:{str(e)}") + + @filter.permission_type(filter.PermissionType.ADMIN) + @spectrecore.command("clear_image_cache", alias=['清除图片缓存']) + async def clear_image_cache(self, event: AstrMessageEvent): + """清除图片转述缓存喵""" + try: + from .utils import ImageCacheManager + success = ImageCacheManager.clear() + if success: + yield event.plain_result("已成功清除图片转述缓存喵~") + else: + yield event.plain_result("清除图片转述缓存失败喵,可能发生错误") + except Exception as e: + logger.error(f"清除图片缓存时发生错误: {e}") + yield event.plain_result(f"清除图片缓存失败喵:{str(e)}") \ No newline at end of file diff --git a/metadata.yaml b/metadata.yaml index 78d60d7..99a6ac6 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -2,6 +2,6 @@ name: spectrecore # 这是你的插件的唯一识别名。 desc: 使大模型更好的主动回复群聊中的消息,带来生动和沉浸的群聊对话体验 # 插件简短描述 help: 自动检测群聊消息并让AI模型进行回复,让群聊更加生动有趣。 # 插件的帮助信息 version: v2.1.11 # 插件版本号。格式:v1.1.1 或者 v1.1 -author: 23q3 # 作者 -repo: https://github.com/23q3/astrbot_plugin_SpectreCore # 插件的仓库地址 +author: 23q3-nbnbnd # 作者 +repo: https://github.com/nbnbnd/astrbot_plugin_SpectreCore # 插件的仓库地址 display_name: 🌟 SpectreCore diff --git a/utils/__init__.py b/utils/__init__.py index 5609632..460c44b 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -5,6 +5,7 @@ from .history_storage import HistoryStorage from .message_utils import MessageUtils from .image_caption import ImageCaptionUtils +from .image_cache import ImageCacheManager from .llm_utils import LLMUtils from .persona_utils import PersonaUtils from .text_filter import TextFilter @@ -14,6 +15,7 @@ "HistoryStorage", "MessageUtils", "ImageCaptionUtils", + "ImageCacheManager", "LLMUtils", "PersonaUtils", "TextFilter", diff --git a/utils/history_storage.py b/utils/history_storage.py index 173f6bb..8e6a231 100644 --- a/utils/history_storage.py +++ b/utils/history_storage.py @@ -113,10 +113,6 @@ async def save_message(message: AstrBotMessage, platform_name: str, chat_id_over sanitized_message = HistoryStorage._sanitize_message(message) history.append(sanitized_message) - # 限制历史记录数量 - if len(history) > 200: - history = history[-200:] - # 确保父目录存在 os.makedirs(os.path.dirname(file_path), exist_ok=True) @@ -463,4 +459,3 @@ def _cleanup_old_images() -> None: except Exception as e: logger.error(f"清理图片文件时发生错误: {e}") - diff --git a/utils/image_cache.py b/utils/image_cache.py new file mode 100644 index 0000000..21a9558 --- /dev/null +++ b/utils/image_cache.py @@ -0,0 +1,448 @@ +import os +import json +import time +import hashlib +import traceback +from typing import Optional, Dict +from astrbot.api.all import * + + +class ImageCacheManager: + """ + 图片转述缓存管理器 + + 用于持久化存储图片转述缓存,避免重复的图片转述请求 + """ + + # 常量定义 + MAX_RETENTION_DAYS = 365 + DEFAULT_RETENTION_DAYS = 7 + HOURS_PER_DAY = 24 + SECONDS_PER_HOUR = 3600 + WRITE_THRESHOLD = 10 # 每10次写入保存一次 + + # 保存配置对象的静态变量 + config: Optional[AstrBotConfig] = None + # 基础存储路径 + base_storage_path: Optional[str] = None + # 内存缓存(用于快速查询) + memory_cache: Dict[str, tuple[str, float]] = {} + # 失败记录缓存(hash -> failure_timestamp) + failure_cache: Dict[str, float] = {} + # 记录写入次数,用于周期性保存 + write_count: int = 0 + + @staticmethod + def init(config: AstrBotConfig): + """ + 初始化图片缓存管理器,保存config引用 + + Args: + config: AstrBotConfig 对象 + """ + ImageCacheManager.config = config + ImageCacheManager.write_count = 0 # 重置写入计数 + ImageCacheManager.memory_cache.clear() # 清空内存缓存,确保从磁盘重新加载 + ImageCacheManager.failure_cache.clear() # 清空失败缓存,确保从磁盘重新加载 + # 初始化基础存储路径 + from astrbot.core.utils.astrbot_path import get_astrbot_data_path + astrbot_data_path = get_astrbot_data_path() + ImageCacheManager.base_storage_path = os.path.join(astrbot_data_path, "data", "image_caption_cache") + ImageCacheManager._ensure_dir(ImageCacheManager.base_storage_path) + logger.info(f"图片缓存存储路径初始化: {ImageCacheManager.base_storage_path}") + + # 加载现有的缓存到内存 + ImageCacheManager._load_cache_from_disk() + + @staticmethod + def _ensure_dir(directory: str) -> None: + """确保目录存在,不存在则创建""" + if not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + + @staticmethod + def _get_cache_file_path() -> str: + """获取缓存文件路径""" + if not ImageCacheManager.base_storage_path: + from astrbot.core.utils.astrbot_path import get_astrbot_data_path + astrbot_data_path = get_astrbot_data_path() + ImageCacheManager.base_storage_path = os.path.join(astrbot_data_path, "data", "image_caption_cache") + ImageCacheManager._ensure_dir(ImageCacheManager.base_storage_path) + + return os.path.join(ImageCacheManager.base_storage_path, "caption_cache.json") + + @staticmethod + def _generate_image_hash(image: str) -> str: + """ + 为图片生成哈希值(用于作为缓存键) + + 使用 SHA256 生成固定长度的哈希,避免过长的键名 + + Args: + image: 图片的base64编码或URL + + Returns: + 图片的哈希值 + """ + return hashlib.sha256(image.encode('utf-8')).hexdigest() + + @staticmethod + def _load_cache_from_disk() -> None: + """从磁盘加载缓存到内存""" + try: + cache_file = ImageCacheManager._get_cache_file_path() + + if not os.path.exists(cache_file): + logger.debug("缓存文件不存在,跳过加载") + return + + with open(cache_file, "r", encoding="utf-8") as f: + cache_data = json.load(f) + + # 兼容两种格式: + # 1) 旧格式: {hash: [caption, timestamp]} + # 2) 新格式: {"captions": {...}, "failures": {...}} + caption_data = cache_data + failure_data = {} + if isinstance(cache_data, dict) and ("captions" in cache_data or "failures" in cache_data): + caption_data = cache_data.get("captions", {}) + failure_data = cache_data.get("failures", {}) + + # 加载成功缓存到内存 + if isinstance(caption_data, dict): + for key, value in caption_data.items(): + try: + # 要求恰好2个元素 + if isinstance(value, (list, tuple)) and len(value) == 2: + caption, timestamp = value[0], value[1] + # 验证类型 + if isinstance(caption, str) and isinstance(timestamp, (int, float)): + ImageCacheManager.memory_cache[key] = (caption, timestamp) + else: + logger.warning(f"缓存条目类型不正确,跳过: {key}") + else: + logger.warning(f"缓存条目格式不正确,跳过: {key}") + except Exception as e: + logger.warning(f"加载缓存条目失败 {key}: {e}") + + logger.info(f"成功从磁盘加载 {len(ImageCacheManager.memory_cache)} 条图片缓存") + else: + logger.warning(f"图片缓存数据格式不正确,期望 dict,实际为 {type(caption_data).__name__},跳过加载") + + # 加载失败缓存到内存 + if isinstance(failure_data, dict): + for key, value in failure_data.items(): + if isinstance(value, (int, float)): + ImageCacheManager.failure_cache[key] = float(value) + else: + logger.warning(f"失败缓存条目格式不正确,跳过: {key}") + elif failure_data: + logger.warning("失败缓存数据格式不正确,跳过加载") + + except Exception as e: + logger.error(f"从磁盘加载缓存失败: {e}") + logger.debug(traceback.format_exc()) + + @staticmethod + def _save_cache_to_disk() -> None: + """将内存缓存保存到磁盘""" + try: + cache_file = ImageCacheManager._get_cache_file_path() + + # 确保父目录存在 + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + + # 转换内存缓存格式为可序列化的格式,并验证条目 + serializable_cache = {} + skipped_count = 0 + for key, value in ImageCacheManager.memory_cache.items(): + if isinstance(value, tuple) and len(value) == 2: + caption, timestamp = value + # 验证条目有效性 + if isinstance(caption, str) and isinstance(timestamp, (int, float)): + serializable_cache[key] = [caption, timestamp] + else: + skipped_count += 1 + logger.debug(f"跳过格式不正确的缓存条目: {key}") + else: + skipped_count += 1 + logger.debug(f"跳过格式不正确的缓存条目: {key}") + + serializable_failures = {} + for key, value in ImageCacheManager.failure_cache.items(): + if isinstance(value, (int, float)): + serializable_failures[key] = float(value) + else: + logger.debug(f"跳过格式不正确的失败缓存条目: {key}") + + with open(cache_file, "w", encoding="utf-8") as f: + json.dump( + { + "captions": serializable_cache, + "failures": serializable_failures + }, + f, + ensure_ascii=False, + indent=2 + ) + + if skipped_count > 0: + logger.debug(f"成功保存 {len(serializable_cache)} 条有效缓存到磁盘,跳过 {skipped_count} 条格式不正确的条目") + else: + logger.debug(f"成功保存 {len(serializable_cache)} 条图片缓存到磁盘") + + except Exception as e: + logger.error(f"保存缓存到磁盘失败: {e}") + logger.debug(traceback.format_exc()) + + @staticmethod + def get(image: str) -> Optional[str]: + """ + 获取缓存的图片转述 + + Args: + image: 图片的base64编码或URL + + Returns: + 缓存的转述文本,如果不存在则返回None + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + + if image_hash in ImageCacheManager.memory_cache: + cached_data = ImageCacheManager.memory_cache[image_hash] + + # 统一处理缓存数据格式(要求严格的tuple/list格式,恰好包含2个元素) + if isinstance(cached_data, (tuple, list)) and len(cached_data) == 2: + caption, timestamp = cached_data[0], cached_data[1] + # 验证提取的值类型 + if not isinstance(caption, str): + logger.warning(f"缓存条目格式不正确,期望字符串但获得 {type(caption).__name__}") + return None + if not isinstance(timestamp, (int, float)): + logger.warning(f"缓存条目时间戳格式不正确,期望数字但获得 {type(timestamp).__name__}") + return None + else: + logger.warning(f"缓存条目格式不正确,期望恰好2个元素的tuple/list但获得 {type(cached_data).__name__}") + return None + + return caption + + return None + + except Exception as e: + logger.error(f"获取缓存失败: {e}") + return None + + @staticmethod + def set(image: str, caption: str) -> bool: + """ + 存储图片转述到缓存 + + Args: + image: 图片的base64编码或URL + caption: 图片的转述文本 + + Returns: + 是否存储成功 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + + # 存储为元组 (caption, timestamp) 用于后续清理 + ImageCacheManager.memory_cache[image_hash] = (caption, time.time()) + + # 基于阈值的周期性保存(更稳定,避免过度I/O) + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: + ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 + + logger.debug(f"缓存图片描述: {image[:50]}...") + return True + + except Exception as e: + logger.error(f"存储缓存失败: {e}") + return False + + @staticmethod + def clear() -> bool: + """ + 清空所有缓存 + + Returns: + 是否清空成功 + """ + try: + ImageCacheManager.memory_cache.clear() + ImageCacheManager.failure_cache.clear() + ImageCacheManager.write_count = 0 + + cache_file = ImageCacheManager._get_cache_file_path() + if os.path.exists(cache_file): + os.remove(cache_file) + + logger.info("已清空所有图片缓存") + return True + + except Exception as e: + logger.error(f"清空缓存失败: {e}") + return False + + @staticmethod + def cleanup_old_entries() -> None: + """ + 清理超过配置天数的缓存条目 + + 防止缓存无限增长 + """ + try: + if not ImageCacheManager.config: + logger.debug("配置未初始化,跳过缓存清理") + return + + image_processing_config = ImageCacheManager.config.get("image_processing", {}) + retention_days = image_processing_config.get("image_retention_days", ImageCacheManager.DEFAULT_RETENTION_DAYS) + + # 验证配置值有效性 + if retention_days < 1 or retention_days > ImageCacheManager.MAX_RETENTION_DAYS: + logger.warning(f"图片保留天数配置无效: {retention_days},使用默认值{ImageCacheManager.DEFAULT_RETENTION_DAYS}天") + retention_days = ImageCacheManager.DEFAULT_RETENTION_DAYS + + current_time = time.time() + cleanup_threshold = retention_days * ImageCacheManager.HOURS_PER_DAY * ImageCacheManager.SECONDS_PER_HOUR + removed_count = 0 + + keys_to_remove = [] + for key, value in ImageCacheManager.memory_cache.items(): + # 统一处理所有格式的缓存条目,要求恰好2个元素 + timestamp = None + if isinstance(value, (tuple, list)) and len(value) == 2: + timestamp = value[1] + + # 如果没有有效的时间戳,视为损坏的条目,标记删除 + if timestamp is None: + keys_to_remove.append(key) + removed_count += 1 + logger.debug(f"删除时间戳无效的缓存条目: {key}") + # 检查是否超过保留期限 + elif current_time - timestamp > cleanup_threshold: + keys_to_remove.append(key) + removed_count += 1 + + for key in keys_to_remove: + del ImageCacheManager.memory_cache[key] + + failure_keys_to_remove = [] + for key, timestamp in ImageCacheManager.failure_cache.items(): + if not isinstance(timestamp, (int, float)): + failure_keys_to_remove.append(key) + removed_count += 1 + elif current_time - timestamp > cleanup_threshold: + failure_keys_to_remove.append(key) + removed_count += 1 + + for key in failure_keys_to_remove: + del ImageCacheManager.failure_cache[key] + + if removed_count > 0: + logger.info(f"清理过期缓存完成,清理了 {removed_count} 条超过 {retention_days} 天的缓存条目") + # 清理后保存一次 + ImageCacheManager._save_cache_to_disk() + + except Exception as e: + logger.error(f"清理缓存时发生错误: {e}") + + @staticmethod + def force_save() -> None: + """强制将缓存保存到磁盘(用于关闭前调用)""" + try: + ImageCacheManager._save_cache_to_disk() + except Exception as e: + logger.error(f"强制保存缓存失败: {e}") + + @staticmethod + def get_failed_timestamp(image: str) -> Optional[float]: + """ + 获取图片最近一次转述失败时间戳 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + timestamp = ImageCacheManager.failure_cache.get(image_hash) + if isinstance(timestamp, (int, float)): + return float(timestamp) + return None + except Exception as e: + logger.error(f"获取失败记录失败: {e}") + return None + + @staticmethod + def is_failed(image: str) -> bool: + """ + 判断图片是否有失败记录 + """ + return ImageCacheManager.get_failed_timestamp(image) is not None + + @staticmethod + def set_failed(image: str) -> bool: + """ + 记录图片转述失败 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + ImageCacheManager.failure_cache[image_hash] = time.time() + + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: + ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 + + return True + except Exception as e: + logger.error(f"记录失败缓存失败: {e}") + return False + + @staticmethod + def clear_failed(image: str) -> bool: + """ + 清理图片失败记录 + """ + try: + image_hash = ImageCacheManager._generate_image_hash(image) + if image_hash in ImageCacheManager.failure_cache: + del ImageCacheManager.failure_cache[image_hash] + + ImageCacheManager.write_count += 1 + if ImageCacheManager.write_count >= ImageCacheManager.WRITE_THRESHOLD: + ImageCacheManager._save_cache_to_disk() + ImageCacheManager.write_count = 0 + + return True + except Exception as e: + logger.error(f"清理失败缓存失败: {e}") + return False + + @staticmethod + def should_skip_failed_image(image: str, latest_success_timestamp: Optional[float], window_seconds: int) -> bool: + """ + 判断失败图片是否应跳过转述: + - 存在失败记录 + - 失败时间早于最近成功时间(表示这张图是在该次成功之前失败的) + - 且二者间隔在窗口时间内(避免无限期跳过) + + Args: + image: 图片的base64编码或URL + latest_success_timestamp: 最近一次成功转述的时间戳 + window_seconds: 失败记录与最近成功记录可判定为“相近”的时间窗口(秒) + + Returns: + 是否应跳过该图片转述 + """ + if latest_success_timestamp is None or window_seconds <= 0: + return False + + failed_timestamp = ImageCacheManager.get_failed_timestamp(image) + if failed_timestamp is None: + return False + + return failed_timestamp < latest_success_timestamp and (latest_success_timestamp - failed_timestamp) <= window_seconds diff --git a/utils/image_caption.py b/utils/image_caption.py index d3f3d1d..c1bdeb4 100644 --- a/utils/image_caption.py +++ b/utils/image_caption.py @@ -1,6 +1,14 @@ from astrbot.api.all import * from typing import Optional import asyncio +import base64 +import binascii +import os +import re +import urllib.parse +import urllib.request +import urllib.error +from .image_cache import ImageCacheManager class ImageCaptionUtils: """ @@ -12,20 +20,203 @@ class ImageCaptionUtils: # 保存context和config对象的静态变量 context: Optional[Context] = None config: Optional[AstrBotConfig] = None - # 图片描述缓存 - caption_cache: dict[str, str] = {} + DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS = 300 + SAFE_NETLOC_LABEL_RE = re.compile(r"[A-Za-z0-9_-]+") @staticmethod def init(context: Context, config: AstrBotConfig): """初始化图片转述工具类,保存context和config引用""" ImageCaptionUtils.context = context ImageCaptionUtils.config = config + # 初始化图片缓存管理器 + ImageCacheManager.init(config) + @staticmethod + def get_failed_image_skip_window_seconds() -> int: + """ + 获取失败图片跳过策略的时间窗口(秒) + """ + config = ImageCaptionUtils.config + if not config: + return ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + + image_processing_config = config.get("image_processing", {}) + skip_window_seconds = image_processing_config.get( + "failed_image_skip_window_seconds", + ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + ) + if not isinstance(skip_window_seconds, int) or skip_window_seconds < 0: + return ImageCaptionUtils.DEFAULT_FAILED_IMAGE_SKIP_WINDOW_SECONDS + return skip_window_seconds + + @staticmethod + def _check_url_accessible(url: str, timeout: int) -> bool: + """ + 同步检查图片 URL 是否可访问(供异步线程调用) + """ + head_fallback_statuses = { + 400, # Bad Request(部分代理/服务不支持 HEAD) + 405, # Method Not Allowed + 501, # Not Implemented + } + range_fallback_statuses = { + 400, # Bad Request(部分服务不支持 Range) + 416, # Range Not Satisfiable + } + try: + req = urllib.request.Request(url, method="HEAD") + with urllib.request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 200) + if not (200 <= status < 400): + return False + content_length = resp.headers.get("Content-Length") + if content_length is not None: + try: + if int(content_length) <= 0: + return False + except (TypeError, ValueError): + pass + return True + except urllib.error.HTTPError as e: + if e.code not in head_fallback_statuses: + return False + except Exception: + return False + + try: + req = urllib.request.Request(url, method="GET", headers={"Range": "bytes=0-0"}) + with urllib.request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 200) + if not (200 <= status < 400): + return False + return True + except urllib.error.HTTPError as e: + if e.code not in range_fallback_statuses: + return False + except Exception: + return False + + try: + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 200) + if not (200 <= status < 400): + return False + return True + except Exception: + return False + + @staticmethod + def _check_local_image_accessible(image_path: str) -> bool: + """ + 同步检查本地图片是否存在且可读取(供异步线程调用) + """ + try: + if not image_path: + return False + if not os.path.exists(image_path) or not os.path.isfile(image_path): + return False + with open(image_path, "rb") as f: + return bool(f.read(1)) + except Exception: + return False + + @staticmethod + def _is_safe_file_netloc(netloc: str) -> bool: + """ + 校验 file:// 的 netloc 是否安全(仅允许主机名格式) + """ + if not netloc or len(netloc) > 253: + return False + labels = netloc.split(".") + for label in labels: + if not label or len(label) > 63: + return False + if label.startswith("-") or label.endswith("-"): + return False + if not ImageCaptionUtils.SAFE_NETLOC_LABEL_RE.fullmatch(label): + return False + return True + + @staticmethod + def _is_safe_unc_path(path: str) -> bool: + """ + 校验 UNC 路径是否包含可疑的路径穿越片段 + """ + if not path: + return False + if ":" in path: + return False + normalized = path.replace("\\", "/") + return ".." not in normalized.split("/") + + @staticmethod + async def _ensure_image_accessible(image: str, timeout: int) -> bool: + """ + 确保图片存在且可获取 + + 注意:file:// 的网络路径仅在 Windows 下支持,其他平台会直接拒绝。 + """ + if not image: + return False + + if image.startswith("http://") or image.startswith("https://"): + return await asyncio.to_thread(ImageCaptionUtils._check_url_accessible, image, timeout) + + if image.startswith("file://"): + try: + parsed = urllib.parse.urlparse(image) + if parsed.netloc and parsed.netloc not in ("", "localhost"): + if os.name == "nt": + if not ImageCaptionUtils._is_safe_file_netloc(parsed.netloc): + logger.warning(f"不安全的 file:// 网络地址: {image}") + return False + unc_path = urllib.request.url2pathname(parsed.path or "") + if not ImageCaptionUtils._is_safe_unc_path(unc_path): + logger.warning(f"不安全的 file:// UNC 路径: {image}") + return False + image_path = f"\\\\{parsed.netloc}{unc_path}" + else: + logger.warning(f"不支持的 file:// 网络路径: {image}") + return False + else: + image_path = urllib.request.url2pathname(parsed.path or "") + if not image_path: + return False + return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, image_path) + except Exception: + return False + + expanded_path = os.path.expanduser(image) + if os.path.exists(expanded_path): + return await asyncio.to_thread(ImageCaptionUtils._check_local_image_accessible, expanded_path) + if image.startswith("~"): + # 展开后的路径不同,视为用户路径而非 base64 + return False + + if image.startswith("data:"): + try: + header, b64data = image.split(",", 1) + if "base64" not in header: + return False + base64.b64decode(b64data, validate=True) + return True + except (ValueError, binascii.Error): + return False + + # 普通 base64 字符串 + try: + base64.b64decode(image, validate=True) + return True + except (binascii.Error, ValueError): + return False + @staticmethod async def generate_image_caption( image: str, # 图片的base64编码或URL umo: Optional[str] = None, # unified_msg_origin,用于 UMO 路由 - timeout: int = 30 + timeout: int = 30, + latest_success_timestamp: Optional[float] = None ) -> Optional[str]: """ 为单张图片生成文字描述 @@ -34,14 +225,17 @@ async def generate_image_caption( image: 图片的base64编码或URL umo: unified_msg_origin,用于获取对应 UMO 的 provider timeout: 超时时间(秒) + latest_success_timestamp: 最近一次成功转述时间戳(用于失败图片跳过策略) Returns: 生成的图片描述文本,如果失败则返回None """ - # 检查缓存 - if image in ImageCaptionUtils.caption_cache: + # 检查持久化缓存 + cached_caption = ImageCacheManager.get(image) + if cached_caption is not None: + ImageCacheManager.clear_failed(image) logger.debug(f"命中图片描述缓存: {image[:50]}...") - return ImageCaptionUtils.caption_cache[image] + return cached_caption # 获取配置 config = ImageCaptionUtils.config @@ -56,6 +250,19 @@ async def generate_image_caption( if not image_processing_config.get("use_image_caption", False): return None + skip_window_seconds = ImageCaptionUtils.get_failed_image_skip_window_seconds() + + if ImageCacheManager.should_skip_failed_image(image, latest_success_timestamp, skip_window_seconds): + logger.debug(f"跳过失败图片转述(该图片失败记录早于本轮最近一次成功,且时间间隔在窗口内): {image[:50]}...") + return None + + # 在调用大模型前确认图片可获取 + image_accessible = await ImageCaptionUtils._ensure_image_accessible(image, timeout=min(timeout, 10)) + if not image_accessible: + logger.warning(f"图片无法获取或不存在,已跳过转述: {image[:50]}...") + ImageCacheManager.set_failed(image) + return None + provider_id = image_processing_config.get("image_caption_provider_id", "") # 获取提供商,支持 UMO 路由 if provider_id == "": @@ -83,15 +290,20 @@ async def call_llm(): llm_response = await asyncio.wait_for(call_llm(), timeout=timeout) caption = llm_response.completion_text - # 缓存结果 + # 缓存结果到持久化缓存 if caption: - ImageCaptionUtils.caption_cache[image] = caption - logger.debug(f"缓存图片描述: {image[:50]}... -> {caption}") + ImageCacheManager.set(image, caption) + ImageCacheManager.clear_failed(image) + logger.debug(f"缓存到持久化存储: {image[:50]}...") + else: + ImageCacheManager.set_failed(image) return caption except asyncio.TimeoutError: logger.warning(f"图片转述超时,超过了{timeout}秒") + ImageCacheManager.set_failed(image) return None except Exception as e: logger.error(f"图片转述失败: {e}") + ImageCacheManager.set_failed(image) return None diff --git a/utils/message_utils.py b/utils/message_utils.py index d66dc74..b7342b5 100644 --- a/utils/message_utils.py +++ b/utils/message_utils.py @@ -82,6 +82,7 @@ async def outline_message_list(message_list: List[BaseMessageComponent], umo: Op umo: unified_msg_origin,用于 UMO 路由 """ outline = "" + latest_success_timestamp: Optional[float] = None for i in message_list: try: # 获取组件类型 @@ -110,9 +111,14 @@ async def outline_message_list(message_list: List[BaseMessageComponent], umo: Op continue image = image_path - caption = await ImageCaptionUtils.generate_image_caption(image, umo=umo) + caption = await ImageCaptionUtils.generate_image_caption( + image, + umo=umo, + latest_success_timestamp=latest_success_timestamp + ) if caption: outline += f"[图片: {caption}]" + latest_success_timestamp = time.time() else: outline += f"[图片]" else: