diff --git a/LICENSE b/LICENSE
index 5798ff9..22d8e6f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2025 饰乐
+Copyright (c) 2024 Les Freire
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6d97efe
--- /dev/null
+++ b/README.md
@@ -0,0 +1,71 @@
+
+
+
+
+
+# astrbot_plugin_parser
+
+_✨ 链接解析器 ✨_
+
+[](https://opensource.org/licenses/MIT)
+[](https://www.python.org/)
+[](https://github.com/Soulter/AstrBot)
+[](https://github.com/Zhalslar)
+
+
+
+## 📖 介绍
+
+| 平台 | 触发的消息形态 | 视频 | 图集 | 音频 |
+| ------- | --------------------------------- | ---- | ---- | ---- |
+| B 站 | av 号/BV 号/链接/短链/卡片/小程序 | ✅ | ✅ | ✅ |
+| 抖音 | 链接(分享链接,兼容电脑端链接) | ✅ | ✅ | ❌️ |
+| 微博 | 链接(博文,视频,show, 文章) | ✅ | ✅ | ❌️ |
+| 小红书 | 链接(含短链)/卡片 | ✅ | ✅ | ❌️ |
+| 快手 | 链接(包含标准链接和短链) | ✅ | ✅ | ❌️ |
+| acfun | 链接 | ✅ | ❌️ | ❌️ |
+| youtube | 链接(含短链) | ✅ | ❌️ | ✅ |
+| tiktok | 链接 | ✅ | ❌️ | ❌️ |
+| twitter | 链接 | ✅ | ✅ | ❌️ |
+
+## 🎨 效果图
+
+插件默认启用 PIL 实现的通用媒体卡片渲染,效果图如下
+
+
+
+## 💿 安装
+
+直接在astrbot的插件市场搜索astrbot_plugin_parser,点击安装,等待完成即可
+
+## ⚙️ 配置
+
+请在astrbot的插件配置面板查看并修改
+
+## 🎉 使用
+
+| 指令 | 权限 | 说明 |
+| :------: | :-------------------: | :---------------: |
+| 开启解析 | ADMIN | 开启解析 |
+| 关闭解析 | ADMIN | 关闭解析 |
+| bm | - | 下载 B 站音频 |
+| ym | - | 下载 youtube 音频 |
+| blogin | ADMIN | 扫码获取 B 站凭证 |
+
+## 🧩 扩展
+
+插件支持自定义解析器,通过继承 `BaseParser` 类并实现 `platform`, `handle` 即可。
+
+示例解析器请看 [示例解析器](https://github.com/Zhalslar/astrbot_plugin_parser/blob/main/core/parsers/example.py)
+
+## 🎉 致谢
+
+本项目核心代码来自[nonebot-plugin-parser](https://github.com/fllesser/nonebot-plugin-parser),请前往原仓库给作者点个Star!
diff --git a/_conf_schema.json b/_conf_schema.json
new file mode 100644
index 0000000..db148ea
--- /dev/null
+++ b/_conf_schema.json
@@ -0,0 +1,160 @@
+{
+ "disabled_sessions": {
+ "description": "关闭解析的会话",
+ "type": "list",
+ "hint": "在会话中使用命令 “开启解析” 和 “关闭解析” 来设置某会话的解析状态",
+ "default": []
+ },
+ "enable_platforms": {
+ "description": "启用解析的平台",
+ "type": "list",
+ "hint": "",
+ "options": [
+ "A站",
+ "B站",
+ "微博",
+ "小红书",
+ "抖音",
+ "快手",
+ "NGA",
+ "TikTok",
+ "推特",
+ "油管"
+ ],
+ "default": [
+ "A站",
+ "B站",
+ "微博",
+ "小红书",
+ "抖音",
+ "快手",
+ "NGA",
+ "TikTok",
+ "推特",
+ "油管"
+ ]
+ },
+ "forward_contents": {
+ "description": "转发媒体内容",
+ "type": "bool",
+ "hint": "是否将解析到的图片/视频/音频作为合并转发消息发送",
+ "default": true
+ },
+ "upload_audio": {
+ "description": "上传音频文件",
+ "type": "bool",
+ "hint": "是否将解析到的音频文件上传到群文件",
+ "default": false
+ },
+ "max_size": {
+ "description": "资源最大大小",
+ "type": "int",
+ "hint": "允许下载的音视频最大体积,单位 MB",
+ "default": 90
+ },
+ "max_duration": {
+ "description": "资源最大时长",
+ "type": "int",
+ "hint": "允许下载的音视频最大时长,单位秒",
+ "default": 480
+ },
+ "download_timeout": {
+ "description": "下载请求超时时间",
+ "type": "int",
+ "hint": "下载视频、音频等较大文件时的请求超时时间,单位秒。 建议设置大一点,视频、音频较大时下载耗时较长",
+ "default": 280
+ },
+ "common_timeout": {
+ "description": "普通请求超时时间",
+ "type": "int",
+ "hint": "普通请求超时时间,单位秒。用于一些普通的请求 ",
+ "default": 15
+ },
+ "bili_ck": {
+ "description": "Bilibili Cookies",
+ "type": "text",
+ "hint": "用于B站解析的登录Cookies,留空则使用无登录状态",
+ "default": ""
+ },
+ "bili_video_codecs": {
+ "description": "B站视频编码",
+ "type": "string",
+ "hint": "优先下载的编码类型,可选:AVC、AV1、HEV",
+ "options": [
+ "AVC",
+ "AV1",
+ "HEV"
+ ],
+ "default": "AVC"
+ },
+ "bili_video_quality": {
+ "description": "B站视频分辨率",
+ "type": "string",
+ "hint": "下载B站视频的分辨率",
+ "options": [
+ "_360P",
+ "_480P",
+ "_720P",
+ "_1080P",
+ "_1080P_PLUS",
+ "_1080P_60",
+ "_4K",
+ "HDR",
+ "DOLBY",
+ "_8K",
+ "AI_REPAIR"
+ ],
+ "default": "_720P"
+ },
+ "ytb_ck": {
+ "description": "YouTube Cookies",
+ "type": "text",
+ "hint": "用于YouTube解析的登录Cookies,留空则使用无登录状态",
+ "default": ""
+ },
+ "proxy": {
+ "description": "代理地址",
+ "type": "string",
+ "hint": "如 http://127.0.0.1:7890,留空则直连。仅作用于 youtube, tiktok 解析",
+ "default": ""
+ },
+ "emoji_cdn": {
+ "description": "Pilmoji 表情 CDN",
+ "type": "string",
+ "hint": "渲染表情使用的 CDN 地址,一般无需修改",
+ "default": "https://cdn.jsdelivr.net/npm/emoji-datasource-facebook@14.0.0/img/facebook/64/"
+ },
+ "emoji_style": {
+ "description": "Pilmoji 表情样式",
+ "type": "string",
+ "hint": "可选:APPLE、FACEBOOK、GOOGLE、TWITTER",
+ "options": [
+ "APPLE",
+ "FACEBOOK",
+ "GOOGLE",
+ "TWITTER"
+ ],
+ "default": "FACEBOOK"
+ },
+ "clean_cron": {
+ "description": "自动清理缓存的触发周期",
+ "type": "string",
+ "hint": "使用 Cron 表达式(分 时 日 月 周)定义。例如:“30 2 * * *” 表示每天 2:30 。留空表示禁用自动清理",
+ "default": "30 2 * * *"
+ },
+ "data_dir": {
+ "description": "数据目录",
+ "type": "string",
+ "invisible": true
+ },
+ "cache_dir": {
+ "description": "缓存目录",
+ "type": "string",
+ "invisible": true
+ },
+ "ytb_cookies_file": {
+ "description": "YouTube Cookies 文件",
+ "type": "string",
+ "invisible": true
+ }
+}
\ No newline at end of file
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/core/clean.py b/core/clean.py
new file mode 100644
index 0000000..b44627b
--- /dev/null
+++ b/core/clean.py
@@ -0,0 +1,62 @@
+import asyncio
+import zoneinfo
+from pathlib import Path
+
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.cron import CronTrigger
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+from astrbot.core.star.context import Context
+
+from .utils import safe_unlink
+
+
+class CacheCleaner:
+ """
+ 每天固定时间自动清理插件缓存目录的调度器封装。
+ """
+ JOBNAME = "CacheCleaner"
+ def __init__(self, context: Context, config: AstrBotConfig):
+ self.clean_cron = config["clean_cron"]
+ self.cache_dir = Path(config["cache_dir"])
+
+ tz = context.get_config().get("timezone")
+ self.timezone = (
+ zoneinfo.ZoneInfo(tz) if tz else zoneinfo.ZoneInfo("Asia/Shanghai")
+ )
+ self.scheduler = AsyncIOScheduler(timezone=self.timezone)
+ self.scheduler.start()
+
+ self.register_task()
+
+ logger.info(f"{self.JOBNAME} 已启动,任务周期:{self.clean_cron}")
+
+ def register_task(self):
+ try:
+ self.trigger = CronTrigger.from_crontab(self.clean_cron)
+ self.scheduler.add_job(
+ func=self._clean_plugin_cache,
+ trigger=self.trigger,
+ name=f"{self.JOBNAME}_scheduler",
+ max_instances=1,
+ )
+ except Exception as e:
+ logger.error(f"[{self.JOBNAME}] Cron 格式错误:{e}")
+
+ async def _clean_plugin_cache(self) -> None:
+ """真正的清理逻辑。"""
+ try:
+ files = [f for f in self.cache_dir.iterdir() if f.is_file()]
+ if not files:
+ logger.info("No cache files to clean.")
+ return
+
+ await asyncio.gather(*(safe_unlink(f) for f in files))
+ logger.info(f"Successfully cleaned {len(files)} cache files.")
+ except Exception:
+ logger.exception("Error while cleaning cache files.")
+
+ async def stop(self):
+ self.scheduler.remove_all_jobs()
+ logger.info(f"[{self.JOBNAME}] 已停止")
diff --git a/core/constants.py b/core/constants.py
new file mode 100644
index 0000000..13574bb
--- /dev/null
+++ b/core/constants.py
@@ -0,0 +1,39 @@
+from enum import Enum
+from typing import Final
+
+COMMON_HEADER: Final[dict[str, str]] = {
+ "User-Agent": (
+ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36"
+ )
+}
+
+IOS_HEADER: Final[dict[str, str]] = {
+ "User-Agent": (
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) "
+ "Version/16.6 Mobile/15E148 Safari/604.1 Edg/132.0.0.0"
+ )
+}
+
+ANDROID_HEADER: Final[dict[str, str]] = {
+ "User-Agent": (
+ "Mozilla/5.0 (Linux; Android 15; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/132.0.0.0 Mobile Safari/537.36 Edg/132.0.0.0"
+ )
+}
+
+
+class PlatformEnum(str, Enum):
+ ACFUN = "acfun"
+ BILIBILI = "bilibili"
+ DOUYIN = "douyin"
+ KUAISHOU = "kuaishou"
+ NGA = "nga"
+ TIKTOK = "tiktok"
+ TWITTER = "twitter"
+ WEIBO = "weibo"
+ XIAOHONGSHU = "xiaohongshu"
+ YOUTUBE = "youtube"
+
+ def __str__(self) -> str:
+ return self.value
diff --git a/core/download.py b/core/download.py
new file mode 100644
index 0000000..269821d
--- /dev/null
+++ b/core/download.py
@@ -0,0 +1,375 @@
+import asyncio
+from asyncio import Task, create_task
+from collections.abc import Callable, Coroutine
+from functools import wraps
+from pathlib import Path
+from typing import Any, ParamSpec, TypeVar
+
+import aiofiles
+import yt_dlp
+from aiohttp import ClientError, ClientSession, ClientTimeout
+from msgspec import Struct, convert
+from tqdm.asyncio import tqdm
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from .constants import COMMON_HEADER
+from .exception import (
+ DownloadException,
+ DurationLimitException,
+ ParseException,
+ SizeLimitException,
+ ZeroSizeException,
+)
+from .utils import LimitedSizeDict, generate_file_name, merge_av, safe_unlink
+
+P = ParamSpec("P")
+T = TypeVar("T")
+
+
+def auto_task(func: Callable[P, Coroutine[Any, Any, T]]) -> Callable[P, Task[T]]:
+ """装饰器:自动将异步函数调用转换为 Task, 完整保留类型提示"""
+
+ @wraps(func)
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> Task[T]:
+ coro = func(*args, **kwargs)
+ name = " | ".join(str(arg) for arg in args if isinstance(arg, str))
+ return create_task(coro, name=func.__name__ + " | " + name)
+
+ return wrapper
+
+
+class VideoInfo(Struct):
+ title: str
+ """标题"""
+ channel: str
+ """频道名称"""
+ uploader: str
+ """上传者 id"""
+ duration: int
+ """时长"""
+ timestamp: int
+ """发布时间戳"""
+ thumbnail: str
+ """封面图片"""
+ description: str
+ """简介"""
+ channel_id: str
+ """频道 id"""
+
+ @property
+ def author_name(self) -> str:
+ return f"{self.channel}@{self.uploader}"
+
+
+class Downloader:
+ """下载器,支持youtube-dlp 和 httpx 流式下载"""
+
+ def __init__(self, config: AstrBotConfig):
+ self.config = config
+ self.cache_dir = Path(config["cache_dir"])
+ self.proxy: str | None = self.config["proxy"] or None
+ self.max_duration: int = config["max_duration"]
+ self.max_size = self.config["max_size"]
+ self.headers: dict[str, str] = COMMON_HEADER.copy()
+ # 视频信息缓存
+ self.info_cache: LimitedSizeDict[str, VideoInfo] = LimitedSizeDict()
+ # 用于流式下载的客户端
+ self.client = ClientSession(
+ timeout=ClientTimeout(total=config["download_timeout"])
+ )
+ @auto_task
+ async def streamd(
+ self,
+ url: str,
+ *,
+ file_name: str | None = None,
+ ext_headers: dict[str, str] | None = None,
+ ) -> Path:
+ """download file by url with stream
+
+ Args:
+ url (str): url address
+ file_name (str | None): file name. Defaults to generate_file_name.
+ ext_headers (dict[str, str] | None): ext headers. Defaults to None.
+
+ Returns:
+ Path: file path
+
+ Raises:
+ httpx.HTTPError: When download fails
+ """
+
+ if not file_name:
+ file_name = generate_file_name(url)
+ file_path = self.cache_dir / file_name
+ # 如果文件存在,则直接返回
+ if file_path.exists():
+ return file_path
+
+ headers = {**self.headers, **(ext_headers or {})}
+
+ try:
+ async with self.client.get(
+ url, headers=headers, allow_redirects=True
+ ) as response:
+ if response.status >= 400:
+ raise ClientError(
+ f"HTTP {response.status} {response.reason}"
+ )
+ content_length = response.headers.get("Content-Length")
+ content_length = int(content_length) if content_length else 0
+
+ if content_length == 0:
+ logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载")
+ raise ZeroSizeException
+ if (file_size := content_length / 1024 / 1024) > self.max_size:
+ logger.warning(
+ f"媒体 url: {url} 大小 {file_size:.2f} MB 超过 {self.max_size} MB, 取消下载"
+ )
+ raise SizeLimitException
+
+ with self.get_progress_bar(file_name, content_length) as bar:
+ async with aiofiles.open(file_path, "wb") as file:
+ async for chunk in response.content.iter_chunked(1024 * 1024):
+ await file.write(chunk)
+ bar.update(len(chunk))
+
+ except ClientError:
+ await safe_unlink(file_path)
+ logger.exception(f"下载失败 | url: {url}, file_path: {file_path}")
+ raise DownloadException("媒体下载失败")
+ return file_path
+
+ @staticmethod
+ def get_progress_bar(desc: str, total: int | None = None) -> tqdm:
+ """获取进度条 bar
+
+ Args:
+ desc (str): 描述
+ total (int | None): 总大小. Defaults to None.
+
+ Returns:
+ tqdm: 进度条
+ """
+ return tqdm(
+ total=total,
+ unit="B",
+ unit_scale=True,
+ unit_divisor=1024,
+ dynamic_ncols=True,
+ colour="green",
+ desc=desc,
+ )
+
+ @auto_task
+ async def download_video(
+ self,
+ url: str,
+ *,
+ video_name: str | None = None,
+ ext_headers: dict[str, str] | None = None,
+ use_ytdlp: bool = False,
+ cookiefile: Path | None = None,
+ ) -> Path:
+ """download video file by url with stream
+
+ Args:
+ url (str): url address
+ video_name (str | None): video name. Defaults to get name by parse url.
+ ext_headers (dict[str, str] | None): ext headers. Defaults to None.
+ use_ytdlp (bool): use ytdlp to download video. Defaults to False.
+ cookiefile (Path | None): cookie file path. Defaults to None.
+
+ Returns:
+ Path: video file path
+
+ Raises:
+ httpx.HTTPError: When download fails
+ """
+ if use_ytdlp:
+ return await self._ytdlp_download_video(url, cookiefile)
+
+ if video_name is None:
+ video_name = generate_file_name(url, ".mp4")
+ return await self.streamd(url, file_name=video_name, ext_headers=ext_headers)
+
+ @auto_task
+ async def download_audio(
+ self,
+ url: str,
+ *,
+ audio_name: str | None = None,
+ ext_headers: dict[str, str] | None = None,
+ use_ytdlp: bool = False,
+ cookiefile: Path | None = None,
+ ) -> Path:
+ """download audio file by url with stream
+
+ Args:
+ url (str): url address
+ audio_name (str | None ): audio name. Defaults to generate from url.
+ ext_headers (dict[str, str] | None): ext headers. Defaults to None.
+
+ Returns:
+ Path: audio file path
+
+ Raises:
+ httpx.HTTPError: When download fails
+ """
+ if use_ytdlp:
+ return await self._ytdlp_download_audio(url, cookiefile)
+
+ if audio_name is None:
+ audio_name = generate_file_name(url, ".mp3")
+ return await self.streamd(url, file_name=audio_name, ext_headers=ext_headers)
+
+ @auto_task
+ async def download_img(
+ self,
+ url: str,
+ *,
+ img_name: str | None = None,
+ ext_headers: dict[str, str] | None = None,
+ ) -> Path:
+ """download image file by url with stream
+
+ Args:
+ url (str): url
+ img_name (str | None): image name. Defaults to generate from url.
+ ext_headers (dict[str, str] | None): ext headers. Defaults to None.
+
+ Returns:
+ Path: image file path
+
+ Raises:
+ httpx.HTTPError: When download fails
+ """
+ if img_name is None:
+ img_name = generate_file_name(url, ".jpg")
+ return await self.streamd(url, file_name=img_name, ext_headers=ext_headers)
+
+ async def download_imgs_without_raise(
+ self,
+ urls: list[str],
+ *,
+ ext_headers: dict[str, str] | None = None,
+ ) -> list[Path]:
+ """download images without raise
+
+ Args:
+ urls (list[str]): urls
+ ext_headers (dict[str, str] | None): ext headers. Defaults to None.
+
+ Returns:
+ list[Path]: image file paths
+ """
+ paths_or_errs = await asyncio.gather(
+ *[self.download_img(url, ext_headers=ext_headers) for url in urls],
+ return_exceptions=True,
+ )
+ return [p for p in paths_or_errs if isinstance(p, Path)]
+
+ @auto_task
+ async def download_av_and_merge(
+ self,
+ v_url: str,
+ a_url: str,
+ *,
+ output_path: Path,
+ ext_headers: dict[str, str] | None = None,
+ ) -> Path:
+ """download video and audio file by url with stream and merge"""
+ v_path, a_path = await asyncio.gather(
+ self.download_video(v_url, ext_headers=ext_headers),
+ self.download_audio(a_url, ext_headers=ext_headers),
+ )
+ await merge_av(v_path=v_path, a_path=a_path, output_path=output_path)
+ return output_path
+
+ # region -------------------- 私有:yt-dlp --------------------
+
+ async def ytdlp_extract_info(
+ self, url: str, cookiefile: Path | None = None
+ ) -> VideoInfo:
+ if (info := self.info_cache.get(url)) is not None:
+ return info
+ opts = {
+ "quiet": True,
+ "skip_download": True,
+ "force_generic_extractor": True,
+ "cookiefile": None,
+ }
+ if self.proxy:
+ opts["proxy"] = self.proxy
+ if cookiefile and cookiefile.is_file():
+ opts["cookiefile"] = str(cookiefile)
+ with yt_dlp.YoutubeDL(opts) as ydl:
+ raw = await asyncio.to_thread(ydl.extract_info, url, download=False)
+ if not raw:
+ raise ParseException("获取视频信息失败")
+ info = convert(raw, VideoInfo)
+ self.info_cache[url] = info
+ return info
+
+ async def _ytdlp_download_video(
+ self, url: str, cookiefile: Path | None = None
+ ) -> Path:
+ info = await self.ytdlp_extract_info(url, cookiefile)
+ if info.duration > self.max_duration:
+ raise DurationLimitException
+
+ video_path = self.cache_dir / generate_file_name(url, ".mp4")
+ if video_path.exists():
+ return video_path
+
+ opts = {
+ "outtmpl": str(video_path),
+ "merge_output_format": "mp4",
+ # "format": f"bv[filesize<={info.duration // 10 + 10}M]+ba/b[filesize<={info.duration // 8 + 10}M]",
+ "format": "best[height<=720]/bestvideo[height<=720]+bestaudio/best",
+ "postprocessors": [
+ {"key": "FFmpegVideoConvertor", "preferedformat": "mp4"}
+ ],
+ "cookiefile": None,
+ }
+ if self.proxy:
+ opts["proxy"] = self.proxy
+ if cookiefile and cookiefile.is_file():
+ opts["cookiefile"] = str(cookiefile)
+
+ with yt_dlp.YoutubeDL(opts) as ydl:
+ await asyncio.to_thread(ydl.download, [url])
+ return video_path
+
+ async def _ytdlp_download_audio(self, url: str, cookiefile: Path | None) -> Path:
+ file_name = generate_file_name(url)
+ audio_path = self.cache_dir / f"{file_name}.flac"
+ if audio_path.exists():
+ return audio_path
+
+ opts = {
+ "outtmpl": str(self.cache_dir / file_name) + ".%(ext)s",
+ "format": "bestaudio/best",
+ "postprocessors": [
+ {
+ "key": "FFmpegExtractAudio",
+ "preferredcodec": "flac",
+ "preferredquality": "0",
+ }
+ ],
+ "cookiefile": None,
+ }
+ if self.proxy:
+ opts["proxy"] = self.proxy
+ if cookiefile and cookiefile.is_file():
+ opts["cookiefile"] = str(cookiefile)
+
+ with yt_dlp.YoutubeDL(opts) as ydl:
+ await asyncio.to_thread(ydl.download, [url])
+ return audio_path
+
+ async def close(self):
+ """关闭网络客户端"""
+ await self.client.close()
diff --git a/core/exception.py b/core/exception.py
new file mode 100644
index 0000000..31a9549
--- /dev/null
+++ b/core/exception.py
@@ -0,0 +1,46 @@
+class ParseException(Exception):
+ """异常基类"""
+
+ def __init__(self, message: str):
+ super().__init__(message)
+ self.message = message
+
+
+class TipException(ParseException):
+ """提示异常"""
+
+ pass
+
+
+class DownloadException(ParseException):
+ """下载异常"""
+
+ def __init__(self, message: str | None = None):
+ super().__init__(message or "媒体下载失败")
+
+
+class DownloadLimitException(DownloadException):
+ """下载超过限制异常"""
+
+ pass
+
+
+class SizeLimitException(DownloadLimitException):
+ """下载大小超过限制异常"""
+
+ def __init__(self):
+ super().__init__("媒体大小超过配置限制,取消下载")
+
+
+class DurationLimitException(DownloadLimitException):
+ """下载时长超过限制异常"""
+
+ def __init__(self):
+ super().__init__("媒体时长超过配置限制,取消下载")
+
+
+class ZeroSizeException(DownloadException):
+ """下载大小为 0 异常"""
+
+ def __init__(self):
+ super().__init__("媒体大小为 0, 取消下载")
diff --git a/core/parsers/__init__.py b/core/parsers/__init__.py
new file mode 100644
index 0000000..cdb3ceb
--- /dev/null
+++ b/core/parsers/__init__.py
@@ -0,0 +1,48 @@
+
+from .acfun import AcfunParser
+from .base import BaseParser, handle
+from .bilibili import BilibiliParser
+from .data import (
+ AudioContent,
+ Author,
+ DynamicContent,
+ GraphicsContent,
+ ImageContent,
+ ParseResult,
+ Platform,
+ VideoContent,
+)
+from .douyin import DouyinParser
+from .kuaishou import KuaiShouParser
+from .nga import NGAParser
+from .tiktok import TikTokParser
+from .twitter import TwitterParser
+from .weibo import WeiBoParser
+from .xiaohongshu import XiaoHongShuParser
+from .youtube import YouTubeParser
+
+__all__ = [
+ # 数据模型
+ "AudioContent",
+ "Author",
+ "DynamicContent",
+ "GraphicsContent",
+ "ImageContent",
+ "ParseResult",
+ "Platform",
+ "VideoContent",
+ # 基础组件
+ "BaseParser",
+ "handle",
+ # 各平台 Parser
+ "AcfunParser",
+ "BilibiliParser",
+ "DouyinParser",
+ "KuaiShouParser",
+ "NGAParser",
+ "TikTokParser",
+ "TwitterParser",
+ "WeiBoParser",
+ "XiaoHongShuParser",
+ "YouTubeParser",
+]
diff --git a/core/parsers/acfun.py b/core/parsers/acfun.py
new file mode 100644
index 0000000..fb0ff82
--- /dev/null
+++ b/core/parsers/acfun.py
@@ -0,0 +1,161 @@
+import asyncio
+import json
+import re
+import time
+from pathlib import Path
+from typing import ClassVar
+
+import aiofiles
+from aiohttp import ClientError
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from ..exception import DownloadException, ParseException
+from ..utils import safe_unlink
+from .base import BaseParser, Platform, PlatformEnum, handle
+
+
+class AcfunParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.ACFUN, display_name="A站")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ self.headers["referer"] = "https://www.acfun.cn/"
+ self.cache_dir = Path(config["cache_dir"])
+ self.max_size = self.config["max_size"]
+
+ @handle("acfun.cn", r"(?:ac=|/ac)(?P\d+)")
+ async def _parse(self, searched: re.Match[str]):
+ acid = int(searched.group("acid"))
+ url = f"https://www.acfun.cn/v/ac{acid}"
+
+ m3u8_url, title, description, author, upload_time = await self.parse_video_info(url)
+ author = self.create_author(author) if author else None
+
+ # 2024-12-1 -> timestamp
+ try:
+ timestamp = int(time.mktime(time.strptime(upload_time, "%Y-%m-%d")))
+ except ValueError:
+ timestamp = None
+ text = f"简介: {description}"
+
+ # 下载视频
+ video_task = asyncio.create_task(self.download_video(m3u8_url, acid))
+
+ return self.result(
+ title=title,
+ text=text,
+ author=author,
+ timestamp=timestamp,
+ contents=[self.create_video_content(video_task)],
+ )
+
+ async def parse_video_info(self, url: str) -> tuple[str, str, str, str, str]:
+ """解析acfun链接获取详细信息
+
+ Args:
+ url (str): 链接
+
+ Returns:
+ tuple: (m3u8_url, title, description, author, upload_time)
+ """
+
+ # 拼接查询参数
+ url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
+
+ async with self.client.get(url, headers=self.headers) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"HTTP {resp.status}")
+ raw = await resp.text()
+
+ matched = re.search(r"window\.videoInfo =(.*?)", raw)
+ if not matched:
+ raise ParseException("解析 acfun 视频信息失败")
+ json_str = str(matched.group(1))
+ json_str = json_str.replace('\\\\"', '\\"').replace('\\"', '"')
+ video_info = json.loads(json_str)
+
+ title = video_info.get("title", "")
+ description = video_info.get("description", "")
+ author = video_info.get("user", {}).get("name", "")
+ upload_time = video_info.get("createTime", "")
+
+ ks_play_json = video_info["currentVideoInfo"]["ksPlayJson"]
+ ks_play = json.loads(ks_play_json)
+ representations = ks_play["adaptationSet"][0]["representation"]
+ # 这里[d['url'] for d in representations],从 4k ~ 360,此处默认720p
+ m3u8_url = [d["url"] for d in representations][3]
+
+ return m3u8_url, title, description, author, upload_time
+
+ async def download_video(self, m3u8s_url: str, acid: int) -> Path:
+ """下载acfun视频
+
+ Args:
+ m3u8s_url (str): m3u8链接
+ acid (int): acid
+
+ Returns:
+ Path: 下载的mp4文件
+ """
+
+ m3u8_full_urls = await self._parse_m3u8(m3u8s_url)
+ video_file = self.cache_dir / f"acfun_{acid}.mp4"
+ if video_file.exists():
+ return video_file
+
+ max_size = self.max_size * 1024 * 1024
+
+ try:
+ async with aiofiles.open(video_file, "wb") as f:
+ with self.downloader.get_progress_bar(video_file.name) as bar:
+ total = 0
+ for url in m3u8_full_urls:
+ async with self.client.get(url, headers=self.headers) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"{resp.status} {resp.reason}")
+ async for chunk in resp.content.iter_chunked(1024 * 1024):
+ await f.write(chunk)
+ total += len(chunk)
+ bar.update(len(chunk))
+ if total > max_size: # 大小截断
+ break
+ if total > max_size:
+ break
+
+ except ClientError:
+ await safe_unlink(video_file)
+ logger.exception("视频下载失败")
+ raise DownloadException("视频下载失败")
+ return video_file
+
+ async def _parse_m3u8(self, m3u8_url: str):
+ """解析m3u8链接
+
+ Args:
+ m3u8_url (str): m3u8链接
+
+ Returns:
+ list[str]: 视频链接
+ """
+ async with self.client.get(m3u8_url, headers=self.headers) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"{resp.status} {resp.reason}")
+ m3u8_file = await resp.text()
+ # 分离ts文件链接
+ raw_pieces = re.split(r"\n#EXTINF:.{8},\n", m3u8_file)
+ # 过滤头部\
+ m3u8_relative_links = raw_pieces[1:]
+
+ # 修改尾部 去掉尾部多余的结束符
+ patched_tail = m3u8_relative_links[-1].split("\n")[0]
+ m3u8_relative_links[-1] = patched_tail
+
+ # 完整链接,直接加 m3u8Url 的通用前缀
+ m3u8_prefix = "/".join(m3u8_url.split("/")[0:-1])
+ m3u8_full_urls = [f"{m3u8_prefix}/{d}" for d in m3u8_relative_links]
+
+ return m3u8_full_urls
diff --git a/core/parsers/base.py b/core/parsers/base.py
new file mode 100644
index 0000000..3a6d9a6
--- /dev/null
+++ b/core/parsers/base.py
@@ -0,0 +1,287 @@
+"""Parser 基类定义"""
+
+from abc import ABC
+from asyncio import Task
+from collections.abc import Callable, Coroutine
+from pathlib import Path
+from re import Match, Pattern, compile
+from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast
+
+from aiohttp import ClientError, ClientSession, ClientTimeout, TCPConnector
+from typing_extensions import Unpack
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..constants import ANDROID_HEADER, COMMON_HEADER, IOS_HEADER
+from ..constants import PlatformEnum as PlatformEnum
+from ..download import Downloader
+from ..exception import DownloadException as DownloadException
+from ..exception import DurationLimitException as DurationLimitException
+from ..exception import ParseException as ParseException
+from ..exception import SizeLimitException as SizeLimitException
+from ..exception import TipException as TipException
+from ..exception import ZeroSizeException as ZeroSizeException
+from .data import ParseResult, ParseResultKwargs, Platform
+
+T = TypeVar("T", bound="BaseParser")
+HandlerFunc = Callable[[T, Match[str]], Coroutine[Any, Any, ParseResult]]
+KeyPatterns = list[tuple[str, Pattern[str]]]
+
+_KEY_PATTERNS = "_key_patterns"
+
+
+# 注册处理器装饰器
+def handle(keyword: str, pattern: str):
+ """注册处理器装饰器"""
+
+ def decorator(func: HandlerFunc[T]) -> HandlerFunc[T]:
+ if not hasattr(func, _KEY_PATTERNS):
+ setattr(func, _KEY_PATTERNS, [])
+
+ key_patterns: KeyPatterns = getattr(func, _KEY_PATTERNS)
+ key_patterns.append((keyword, compile(pattern)))
+
+ return func
+
+ return decorator
+
+
+class BaseParser:
+ """所有平台 Parser 的抽象基类
+
+ 子类必须实现:
+ - platform: 平台信息(包含名称和显示名称)
+ """
+
+ _registry: ClassVar[list[type["BaseParser"]]] = []
+ """ 存储所有已注册的 Parser 类 """
+
+ platform: ClassVar[Platform]
+ """ 平台信息(包含名称和显示名称) """
+
+ _session: ClassVar[ClientSession | None] = None
+ """ 全局 ClientSession 对象 """
+
+ if TYPE_CHECKING:
+ _key_patterns: ClassVar[KeyPatterns]
+ _handlers: ClassVar[dict[str, HandlerFunc]]
+
+ def __init__(
+ self,
+ config: AstrBotConfig,
+ downloader: Downloader,
+ ):
+ self.headers = COMMON_HEADER.copy()
+ self.ios_headers = IOS_HEADER.copy()
+ self.android_headers = ANDROID_HEADER.copy()
+ self.config = config
+ self.downloader = downloader
+ self.client = self.get_session(config["common_timeout"])
+
+ def __init_subclass__(cls, **kwargs):
+ """自动注册子类到 _registry"""
+ super().__init_subclass__(**kwargs)
+ if ABC not in cls.__bases__: # 跳过抽象类
+ BaseParser._registry.append(cls)
+
+ cls._handlers = {}
+ cls._key_patterns = []
+
+ # 获取所有被 handle 装饰的方法
+ for attr_name in dir(cls):
+ attr = getattr(cls, attr_name)
+ if callable(attr) and hasattr(attr, _KEY_PATTERNS):
+ key_patterns: KeyPatterns = getattr(attr, _KEY_PATTERNS)
+ handler = cast(HandlerFunc, attr)
+ for keyword, pattern in key_patterns:
+ cls._handlers[keyword] = handler
+ cls._key_patterns.append((keyword, pattern))
+
+ # 按关键字长度降序排序
+ cls._key_patterns.sort(key=lambda x: -len(x[0]))
+
+ @classmethod
+ def get_all_subclass(cls) -> list[type["BaseParser"]]:
+ """获取所有已注册的 Parser 类"""
+ return cls._registry
+
+ @classmethod
+ def get_session(cls, timeout: float = 30) -> ClientSession:
+ """取全局单例,首次调用时创建"""
+ if cls._session is None or cls._session.closed:
+ cls._session = ClientSession(
+ connector=TCPConnector(ssl=False),
+ timeout=ClientTimeout(total=timeout),
+ )
+ return cls._session
+
+ @classmethod
+ async def close_session(cls) -> None:
+ """关闭全局单例,插件卸载时调用一次即可"""
+ if cls._session and not cls._session.closed:
+ await cls._session.close()
+ cls._session = None
+
+ async def parse(self, keyword: str, searched: Match[str]) -> ParseResult:
+ """解析 URL 提取信息
+
+ Args:
+ keyword: 关键词
+ searched: 正则表达式匹配对象,由平台对应的模式匹配得到
+
+ Returns:
+ ParseResult: 解析结果
+
+ Raises:
+ ParseException: 解析失败时抛出
+ """
+ return await self._handlers[keyword](self, searched)
+
+ async def parse_with_redirect(
+ self,
+ url: str,
+ headers: dict[str, str] | None = None,
+ ) -> ParseResult:
+ """先重定向再解析"""
+ redirect_url = await self.get_redirect_url(url, headers=headers or self.headers)
+
+ if redirect_url == url:
+ raise ParseException(f"无法重定向 URL: {url}")
+
+ keyword, searched = self.search_url(redirect_url)
+ return await self.parse(keyword, searched)
+
+ @classmethod
+ def search_url(cls, url: str) -> tuple[str, Match[str]]:
+ """搜索 URL 匹配模式"""
+ for keyword, pattern in cls._key_patterns:
+ if keyword not in url:
+ continue
+ if searched := pattern.search(url):
+ return keyword, searched
+ raise ParseException(f"无法匹配 {url}")
+
+ @classmethod
+ def result(cls, **kwargs: Unpack[ParseResultKwargs]) -> ParseResult:
+ """构建解析结果"""
+ return ParseResult(platform=cls.platform, **kwargs)
+
+
+ async def get_redirect_url(
+ self,
+ url: str,
+ headers: dict[str, str] | None = None,
+ ) -> str:
+ """获取重定向后的 URL, 单次重定向"""
+
+ headers = headers or COMMON_HEADER.copy()
+ async with self.client.get(url, headers=headers, allow_redirects=False) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"redirect check {resp.status} {resp.reason}")
+ return resp.headers.get("Location", url)
+
+ async def get_final_url(
+ self,
+ url: str,
+ headers: dict[str, str] | None = None,
+ ) -> str:
+ """获取重定向后的 URL, 允许多次重定向"""
+ headers = headers or COMMON_HEADER.copy()
+ async with self.client.get(
+ url, headers=headers, allow_redirects=True
+ ) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"final url check {resp.status} {resp.reason}")
+ return str(resp.url)
+
+ def create_author(
+ self,
+ name: str,
+ avatar_url: str | None = None,
+ description: str | None = None,
+ ):
+ """创建作者对象"""
+ from .data import Author
+
+ avatar_task = None
+ if avatar_url:
+ avatar_task = self.downloader.download_img(
+ avatar_url, ext_headers=self.headers
+ )
+ return Author(name=name, avatar=avatar_task, description=description)
+
+ def create_video_content(
+ self,
+ url_or_task: str | Task[Path],
+ cover_url: str | None = None,
+ duration: float = 0.0,
+ ):
+ """创建视频内容"""
+ from .data import VideoContent
+
+ cover_task = None
+ if cover_url:
+ cover_task = self.downloader.download_img(
+ cover_url, ext_headers=self.headers
+ )
+ if isinstance(url_or_task, str):
+ url_or_task = self.downloader.download_video(
+ url_or_task, ext_headers=self.headers
+ )
+
+ return VideoContent(url_or_task, cover_task, duration)
+
+ def create_image_contents(
+ self,
+ image_urls: list[str],
+ ):
+ """创建图片内容列表"""
+ from .data import ImageContent
+
+ contents: list[ImageContent] = []
+ for url in image_urls:
+ task = self.downloader.download_img(url, ext_headers=self.headers)
+ contents.append(ImageContent(task))
+ return contents
+
+ def create_dynamic_contents(
+ self,
+ dynamic_urls: list[str],
+ ):
+ """创建动态图片内容列表"""
+ from .data import DynamicContent
+
+ contents: list[DynamicContent] = []
+ for url in dynamic_urls:
+ task = self.downloader.download_video(url, ext_headers=self.headers)
+ contents.append(DynamicContent(task))
+ return contents
+
+ def create_audio_content(
+ self,
+ url_or_task: str | Task[Path],
+ duration: float = 0.0,
+ ):
+ """创建音频内容"""
+ from .data import AudioContent
+
+ if isinstance(url_or_task, str):
+ url_or_task = self.downloader.download_audio(
+ url_or_task, ext_headers=self.headers
+ )
+
+ return AudioContent(url_or_task, duration)
+
+ def create_graphics_content(
+ self,
+ image_url: str,
+ text: str | None = None,
+ alt: str | None = None,
+ ):
+ """创建图文内容 图片不能为空 文字可空 渲染时文字在前 图片在后"""
+ from .data import GraphicsContent
+
+ image_task = self.downloader.download_img(image_url, ext_headers=self.headers)
+ return GraphicsContent(image_task, text, alt)
+
+
diff --git a/core/parsers/bilibili/__init__.py b/core/parsers/bilibili/__init__.py
new file mode 100644
index 0000000..44c55e0
--- /dev/null
+++ b/core/parsers/bilibili/__init__.py
@@ -0,0 +1,524 @@
+import asyncio
+import json
+from collections.abc import AsyncGenerator
+from pathlib import Path
+from re import Match
+from typing import ClassVar
+
+from bilibili_api import HEADERS, Credential, request_settings, select_client
+from bilibili_api.login_v2 import QrCodeLogin, QrCodeLoginEvents
+from bilibili_api.opus import Opus
+from bilibili_api.video import Video, VideoCodecs, VideoQuality
+from msgspec import convert
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ...utils import ck2dict
+from ..base import (
+ BaseParser,
+ Downloader,
+ DownloadException,
+ DurationLimitException,
+ ParseException,
+ PlatformEnum,
+ handle,
+)
+from ..data import ImageContent, MediaContent, Platform
+
+# 选择客户端
+select_client("curl_cffi")
+# 模拟浏览器,第二参数数值参考 curl_cffi 文档
+# https://curl-cffi.readthedocs.io/en/latest/impersonate.html
+request_settings.set("impersonate", "chrome131")
+
+
+class BilibiliParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.BILIBILI, display_name="B站")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ self.headers = HEADERS.copy()
+ self._credential: Credential | None = None
+ self.max_duration = config["max_duration"]
+ self.cache_dir = Path(config["cache_dir"])
+
+ self.video_quality = getattr(
+ VideoQuality, config["bili_video_quality"].upper(), VideoQuality._720P
+ )
+ self.codecs = getattr(
+ VideoCodecs, config["bili_video_codecs"].upper(), VideoCodecs.AVC
+ )
+ self.bili_ck = config["bili_ck"]
+ self._cookies_file = Path(config["data_dir"]) / "bilibili_cookies.json"
+
+ @handle("b23.tv", r"b23\.tv/[A-Za-z\d\._?%&+\-=/#]+")
+ @handle("bili2233", r"bili2233\.cn/[A-Za-z\d\._?%&+\-=/#]+")
+ async def _parse_short_link(self, searched: Match[str]):
+ """解析短链"""
+ url = f"https://{searched.group(0)}"
+ return await self.parse_with_redirect(url)
+
+ @handle("BV", r"^(?PBV[0-9a-zA-Z]{10})(?:\s)?(?P\d{1,3})?$")
+ @handle("/BV", r"bilibili\.com(?:/video)?/(?PBV[0-9a-zA-Z]{10})(?:\?p=(?P\d{1,3}))?")
+ async def _parse_bv(self, searched: Match[str]):
+ """解析视频信息"""
+ bvid = str(searched.group("bvid"))
+ page_num = int(searched.group("page_num") or 1)
+
+ return await self.parse_video(bvid=bvid, page_num=page_num)
+
+ @handle("av", r"^av(?P\d{6,})(?:\s)?(?P\d{1,3})?$")
+ @handle("/av", r"bilibili\.com(?:/video)?/av(?P\d{6,})(?:\?p=(?P\d{1,3}))?")
+ async def _parse_av(self, searched: Match[str]):
+ """解析视频信息"""
+ avid = int(searched.group("avid"))
+ page_num = int(searched.group("page_num") or 1)
+
+ return await self.parse_video(avid=avid, page_num=page_num)
+
+ @handle("/dynamic/", r"bilibili\.com/dynamic/(?P\d+)")
+ @handle("t.bili", r"t\.bilibili\.com/(?P\d+)")
+ async def _parse_dynamic(self, searched: Match[str]):
+ """解析动态信息"""
+ dynamic_id = int(searched.group("dynamic_id"))
+ return await self.parse_dynamic(dynamic_id)
+
+ @handle("live.bili", r"live\.bilibili\.com/(?P\d+)")
+ async def _parse_live(self, searched: Match[str]):
+ """解析直播信息"""
+ room_id = int(searched.group("room_id"))
+ return await self.parse_live(room_id)
+
+ @handle("/favlist", r"favlist\?fid=(?P\d+)")
+ async def _parse_favlist(self, searched: Match[str]):
+ """解析收藏夹信息"""
+ fav_id = int(searched.group("fav_id"))
+ return await self.parse_favlist(fav_id)
+
+ @handle("/read/", r"bilibili\.com/read/cv(?P\d+)")
+ async def _parse_read(self, searched: Match[str]):
+ """解析专栏信息"""
+ read_id = int(searched.group("read_id"))
+ return await self.parse_read(read_id)
+
+ @handle("/opus/", r"bilibili\.com/opus/(?P\d+)")
+ async def _parse_opus(self, searched: Match[str]):
+ """解析图文动态信息"""
+ opus_id = int(searched.group("opus_id"))
+ return await self.parse_opus(opus_id)
+
+ async def parse_video(
+ self,
+ *,
+ bvid: str | None = None,
+ avid: int | None = None,
+ page_num: int = 1,
+ ):
+ """解析视频信息
+
+ Args:
+ bvid (str | None): bvid
+ avid (int | None): avid
+ page_num (int): 页码
+ """
+
+ from .video import AIConclusion, VideoInfo
+
+ video = await self._get_video(bvid=bvid, avid=avid)
+ # 转换为 msgspec struct
+ video_info = convert(await video.get_info(), VideoInfo)
+ # 获取简介
+ text = f"简介: {video_info.desc}" if video_info.desc else None
+ # up
+ author = self.create_author(video_info.owner.name, video_info.owner.face)
+ # 处理分 p
+ page_info = video_info.extract_info_with_page(page_num)
+
+ # 获取 AI 总结
+ if self._credential:
+ cid = await video.get_cid(page_info.index)
+ ai_conclusion = await video.get_ai_conclusion(cid)
+ ai_conclusion = convert(ai_conclusion, AIConclusion)
+ ai_summary = ai_conclusion.summary
+ else:
+ ai_summary: str = "哔哩哔哩 cookie 未配置或失效, 无法使用 AI 总结"
+
+ url = f"https://bilibili.com/{video_info.bvid}"
+ url += f"?p={page_info.index + 1}" if page_info.index > 0 else ""
+
+ # 视频下载 task
+ async def download_video():
+ output_path = self.cache_dir / f"{video_info.bvid}-{page_num}.mp4"
+ if output_path.exists():
+ return output_path
+ v_url, a_url = await self.extract_download_urls(video=video, page_index=page_info.index)
+ if page_info.duration > self.max_duration:
+ raise DurationLimitException
+ if a_url is not None:
+ return await self.downloader.download_av_and_merge(
+ v_url, a_url, output_path=output_path, ext_headers=self.headers
+ )
+ else:
+ return await self.downloader.streamd(
+ v_url, file_name=output_path.name, ext_headers=self.headers
+ )
+
+ video_task = asyncio.create_task(download_video())
+ video_content = self.create_video_content(
+ video_task,
+ page_info.cover,
+ page_info.duration,
+ )
+
+ return self.result(
+ url=url,
+ title=page_info.title,
+ timestamp=page_info.timestamp,
+ text=text,
+ author=author,
+ contents=[video_content],
+ extra={"info": ai_summary},
+ )
+
+ async def parse_dynamic(self, dynamic_id: int):
+ """解析动态信息
+
+ Args:
+ url (str): 动态链接
+ """
+ from bilibili_api.dynamic import Dynamic
+
+ from .dynamic import DynamicItem
+
+ dynamic = Dynamic(dynamic_id, await self.credential)
+
+ # 转换为结构体
+ dynamic_data = convert(await dynamic.get_info(), DynamicItem)
+ dynamic_info = dynamic_data.item
+ # 使用结构体属性提取信息
+ author = self.create_author(dynamic_info.name, dynamic_info.avatar)
+
+ # 下载图片
+ contents: list[MediaContent] = []
+ for image_url in dynamic_info.image_urls:
+ img_task = self.downloader.download_img(image_url, ext_headers=self.headers)
+ contents.append(ImageContent(img_task))
+
+ return self.result(
+ title=dynamic_info.title,
+ text=dynamic_info.text,
+ timestamp=dynamic_info.timestamp,
+ author=author,
+ contents=contents,
+ )
+
+ async def parse_opus(self, opus_id: int):
+ """解析图文动态信息
+
+ Args:
+ opus_id (int): 图文动态 id
+ """
+ opus = Opus(opus_id, await self.credential)
+ return await self._parse_opus_obj(opus)
+
+ async def parse_read_old(self, read_id: int):
+ """解析专栏信息, 已废弃
+
+ Args:
+ read_id (int): 专栏 id
+ """
+ from bilibili_api.article import Article
+
+ article = Article(read_id)
+ return await self._parse_opus_obj(await article.turn_to_opus())
+
+ async def _parse_opus_obj(self, bili_opus: Opus):
+ """解析图文动态信息
+
+ Args:
+ opus_id (int): 图文动态 id
+
+ Returns:
+ ParseResult: 解析结果
+ """
+
+ from .opus import ImageNode, OpusItem, TextNode
+
+ opus_info = await bili_opus.get_info()
+ if not isinstance(opus_info, dict):
+ raise ParseException("获取图文动态信息失败")
+ # 转换为结构体
+ opus_data = convert(opus_info, OpusItem)
+ logger.debug(f"opus_data: {opus_data}")
+ author = self.create_author(*opus_data.name_avatar)
+
+ # 按顺序处理图文内容(参考 parse_read 的逻辑)
+ contents: list[MediaContent] = []
+ current_text = ""
+
+ for node in opus_data.gen_text_img():
+ if isinstance(node, ImageNode):
+ contents.append(self.create_graphics_content(node.url, current_text.strip(), node.alt))
+ current_text = ""
+ elif isinstance(node, TextNode):
+ current_text += node.text
+
+ return self.result(
+ title=opus_data.title,
+ author=author,
+ timestamp=opus_data.timestamp,
+ contents=contents,
+ text=current_text.strip(),
+ )
+
+ async def parse_live(self, room_id: int):
+ """解析直播信息
+
+ Args:
+ room_id (int): 直播 id
+
+ Returns:
+ ParseResult: 解析结果
+ """
+ from bilibili_api.live import LiveRoom
+
+ from .live import RoomData
+
+ room = LiveRoom(room_display_id=room_id, credential=await self.credential)
+ info_dict = await room.get_room_info()
+
+ room_data = convert(info_dict, RoomData)
+ contents: list[MediaContent] = []
+ # 下载封面
+ if cover := room_data.cover:
+ cover_task = self.downloader.download_img(cover, ext_headers=self.headers)
+ contents.append(ImageContent(cover_task))
+
+ # 下载关键帧
+ if keyframe := room_data.keyframe:
+ keyframe_task = self.downloader.download_img(
+ keyframe, ext_headers=self.headers
+ )
+ contents.append(ImageContent(keyframe_task))
+
+ author = self.create_author(room_data.name, room_data.avatar)
+
+ url = f"https://www.bilibili.com/blackboard/live/live-activity-player.html?enterTheRoom=0&cid={room_id}"
+ return self.result(
+ url=url,
+ title=room_data.title,
+ text=room_data.detail,
+ contents=contents,
+ author=author,
+ )
+
+ async def parse_read(self, read_id: int):
+ """专栏解析
+
+ Args:
+ read_id (int): 专栏 id
+
+ Returns:
+ texts: list[str], urls: list[str]
+ """
+ from bilibili_api.article import Article
+
+ from .article import ArticleInfo, ImageNode, TextNode
+
+ ar = Article(read_id)
+ # 加载内容
+ await ar.fetch_content()
+ data = ar.json()
+ article_info = convert(data, ArticleInfo)
+ logger.debug(f"article_info: {article_info}")
+
+ contents: list[MediaContent] = []
+ current_text = ""
+ for child in article_info.gen_text_img():
+ if isinstance(child, ImageNode):
+ contents.append(self.create_graphics_content(child.url, current_text.strip(), child.alt))
+ current_text = ""
+ elif isinstance(child, TextNode):
+ current_text += child.text
+
+ author = self.create_author(*article_info.author_info)
+
+ return self.result(
+ title=article_info.title,
+ timestamp=article_info.timestamp,
+ text=current_text.strip(),
+ author=author,
+ contents=contents,
+ )
+
+ async def parse_favlist(self, fav_id: int):
+ """解析收藏夹信息
+
+ Args:
+ fav_id (int): 收藏夹 id
+
+ Returns:
+ list[GraphicsContent]: 图文内容列表
+ """
+ from bilibili_api.favorite_list import get_video_favorite_list_content
+
+ from .favlist import FavData
+
+ # 只会取一页,20 个
+ fav_dict = await get_video_favorite_list_content(fav_id)
+
+ if fav_dict["medias"] is None:
+ raise ParseException("收藏夹内容为空, 或被风控")
+
+ favdata = convert(fav_dict, FavData)
+
+ return self.result(
+ title=favdata.title,
+ timestamp=favdata.timestamp,
+ author=self.create_author(favdata.info.upper.name, favdata.info.upper.face),
+ contents=[self.create_graphics_content(fav.cover, fav.desc) for fav in favdata.medias],
+ )
+
+ async def _get_video(self, *, bvid: str | None = None, avid: int | None = None) -> Video:
+ """解析视频信息
+
+ Args:
+ bvid (str | None): bvid
+ avid (int | None): avid
+ """
+ if avid:
+ return Video(aid=avid, credential=await self.credential)
+ elif bvid:
+ return Video(bvid=bvid, credential=await self.credential)
+ else:
+ raise ParseException("avid 和 bvid 至少指定一项")
+
+ async def extract_download_urls(
+ self,
+ video: Video | None = None,
+ *,
+ bvid: str | None = None,
+ avid: int | None = None,
+ page_index: int = 0,
+ ) -> tuple[str, str | None]:
+ """解析视频下载链接
+
+ Args:
+ bvid (str | None): bvid
+ avid (int | None): avid
+ page_index (int): 页索引 = 页码 - 1
+ """
+
+ from bilibili_api.video import (
+ AudioStreamDownloadURL,
+ VideoDownloadURLDataDetecter,
+ VideoStreamDownloadURL,
+ )
+
+ if video is None:
+ video = await self._get_video(bvid=bvid, avid=avid)
+
+ # 获取下载数据
+ download_url_data = await video.get_download_url(page_index=page_index)
+ detecter = VideoDownloadURLDataDetecter(download_url_data)
+ streams = detecter.detect_best_streams(
+ video_max_quality=self.video_quality,
+ codecs=[self.codecs],
+ no_dolby_video=True,
+ no_hdr=True,
+ )
+ video_stream = streams[0]
+ if not isinstance(video_stream, VideoStreamDownloadURL):
+ raise DownloadException("未找到可下载的视频流")
+ logger.debug(f"视频流质量: {video_stream.video_quality.name}, 编码: {video_stream.video_codecs}")
+
+ audio_stream = streams[1]
+ if not isinstance(audio_stream, AudioStreamDownloadURL):
+ return video_stream.url, None
+ logger.debug(f"音频流质量: {audio_stream.audio_quality.name}")
+ return video_stream.url, audio_stream.url
+
+ def _save_credential(self):
+ """存储哔哩哔哩登录凭证"""
+ if self._credential is None:
+ return
+
+ self._cookies_file.write_text(json.dumps(self._credential.get_cookies()))
+
+ def _load_credential(self):
+ """从文件加载哔哩哔哩登录凭证"""
+ if not self._cookies_file.exists():
+ return
+
+ self._credential = Credential.from_cookies(json.loads(self._cookies_file.read_text()))
+
+ async def login_with_qrcode(self) -> bytes:
+ """通过二维码登录获取哔哩哔哩登录凭证"""
+ self._qr_login = QrCodeLogin()
+ await self._qr_login.generate_qrcode()
+
+ qr_pic = self._qr_login.get_qrcode_picture()
+ return qr_pic.content
+
+ async def check_qr_state(self) -> AsyncGenerator[str, None]:
+ """检查二维码登录状态"""
+ scan_tip_pending = True
+
+ for _ in range(30):
+ state = await self._qr_login.check_state()
+ match state:
+ case QrCodeLoginEvents.DONE:
+ yield "登录成功"
+ self._credential = self._qr_login.get_credential()
+ self._save_credential()
+ break
+ case QrCodeLoginEvents.CONF:
+ if scan_tip_pending:
+ yield "二维码已扫描, 请确认登录"
+ scan_tip_pending = False
+ case QrCodeLoginEvents.TIMEOUT:
+ yield "二维码过期, 请重新生成"
+ break
+ await asyncio.sleep(2)
+ else:
+ yield "二维码登录超时, 请重新生成"
+
+ async def _init_credential(self):
+ """初始化哔哩哔哩登录凭证"""
+ if not self.bili_ck:
+ self._load_credential()
+ return
+
+ credential = Credential.from_cookies(ck2dict(self.bili_ck))
+ if await credential.check_valid():
+ logger.info(f"`parser_bili_ck` 有效, 保存到 {self._cookies_file}")
+ self._credential = credential
+ self._save_credential()
+ else:
+ logger.info(f"`parser_bili_ck` 已过期, 尝试从 {self._cookies_file} 加载")
+ self._load_credential()
+
+ @property
+ async def credential(self) -> Credential | None:
+ """哔哩哔哩登录凭证"""
+
+ if self._credential is None:
+ await self._init_credential()
+ return self._credential
+
+ if not await self._credential.check_valid():
+ logger.warning("哔哩哔哩凭证已过期, 请重新配置")
+ return None
+
+ if await self._credential.check_refresh():
+ logger.info("哔哩哔哩凭证需要刷新")
+ if self._credential.has_ac_time_value() and self._credential.has_bili_jct():
+ await self._credential.refresh()
+ logger.info(f"哔哩哔哩凭证刷新成功, 保存到 {self._cookies_file}")
+ self._save_credential()
+ else:
+ logger.warning("哔哩哔哩凭证刷新需要包含 `SESSDATA`, `ac_time_value` 项")
+
+ return self._credential
diff --git a/core/parsers/bilibili/article.py b/core/parsers/bilibili/article.py
new file mode 100644
index 0000000..3126763
--- /dev/null
+++ b/core/parsers/bilibili/article.py
@@ -0,0 +1,118 @@
+"""Bilibili 专栏文章解析器"""
+
+from collections.abc import Generator
+from typing import Any
+
+from msgspec import Struct
+
+
+class TextNode(Struct):
+ """文本节点"""
+
+ text: str
+
+
+class ImageNode(Struct):
+ """图片节点"""
+
+ url: str
+ alt: str | None = None
+
+
+class Author(Struct):
+ """作者信息"""
+
+ mid: int
+ name: str
+ face: str
+ fans: int
+ level: int
+
+
+class Stats(Struct):
+ """统计信息"""
+
+ view: int
+ favorite: int
+ like: int
+ reply: int
+ share: int
+ coin: int
+
+
+class Meta(Struct):
+ """文章元信息"""
+
+ id: int
+ title: str
+ summary: str
+ publish_time: int
+ author: Author
+ stats: Stats
+ tags: list[dict[str, Any]]
+ words: int
+
+
+class ArticleInfo(Struct):
+ """文章信息"""
+
+ type: str
+ meta: Meta
+ children: list[dict[str, Any]]
+
+ def gen_text_img(self) -> Generator[TextNode | ImageNode, None, None]:
+ """生成文本和图片节点(保持顺序)"""
+ for child in self.children:
+ if child.get("type") == "ParagraphNode":
+ # 处理段落节点,提取所有文本内容
+ text_content = self._extract_text_from_children(child.get("children", []))
+ text_content = text_content.strip()
+ if text_content:
+ yield TextNode(text="\n\n" + text_content)
+ elif child.get("type") == "ImageNode":
+ # 处理图片节点
+ yield ImageNode(url=child.get("url", ""), alt=child.get("alt"))
+ elif child.get("type") == "VideoCardNode":
+ # 处理视频卡片节点(转换为文本描述)
+ yield TextNode(text=f"\n [视频卡片: {child.get('aid', 0)}]")
+
+ def _extract_text_from_children(self, children: list[dict[str, Any]]) -> str:
+ """从子节点列表中提取文本内容"""
+ text_content = ""
+ for child in children:
+ if child.get("type") == "TextNode":
+ text_content += child.get("text", "")
+ elif child.get("type") in ["BoldNode", "FontSizeNode", "ColorNode"]:
+ # 递归处理嵌套节点
+ text_content += self._extract_text_from_children(child.get("children", []))
+ return text_content
+
+ @property
+ def author_info(self) -> tuple[str, str]:
+ """获取作者信息"""
+ return self.meta.author.name, self.meta.author.face
+
+ @property
+ def title(self) -> str:
+ """获取标题"""
+ return self.meta.title
+
+ @property
+ def timestamp(self) -> int:
+ """获取发布时间戳"""
+ return self.meta.publish_time
+
+ @property
+ def summary(self) -> str:
+ """获取摘要"""
+ return self.meta.summary
+
+ @property
+ def stats(self) -> Stats:
+ """获取统计信息"""
+ return self.meta.stats
+
+ @property
+ def tags(self) -> list[str]:
+ """获取标签列表"""
+ return [tag.get("name", "") for tag in self.meta.tags]
diff --git a/core/parsers/bilibili/common.py b/core/parsers/bilibili/common.py
new file mode 100644
index 0000000..3ab328b
--- /dev/null
+++ b/core/parsers/bilibili/common.py
@@ -0,0 +1,10 @@
+from msgspec import Struct
+
+
+class Upper(Struct):
+ mid: int
+ """用户 ID"""
+ name: str
+ """作者"""
+ face: str
+ """头像"""
diff --git a/core/parsers/bilibili/dynamic.py b/core/parsers/bilibili/dynamic.py
new file mode 100644
index 0000000..40e2eef
--- /dev/null
+++ b/core/parsers/bilibili/dynamic.py
@@ -0,0 +1,197 @@
+from typing import Any
+
+from msgspec import Struct, convert
+
+
+class AuthorInfo(Struct):
+ """作者信息"""
+
+ name: str
+ face: str
+ mid: int
+ pub_time: str
+ pub_ts: int
+ # jump_url: str
+ # following: bool = False
+ # official_verify: dict[str, Any] | None = None
+ # vip: dict[str, Any] | None = None
+ # pendant: dict[str, Any] | None = None
+
+
+class VideoArchive(Struct):
+ """视频信息"""
+
+ aid: str
+ bvid: str
+ title: str
+ desc: str
+ cover: str
+ # duration_text: str
+ # jump_url: str
+ # stat: dict[str, str]
+ # badge: dict[str, Any] | None = None
+
+
+class OpusImage(Struct):
+ """图文动态图片信息"""
+
+ url: str
+ # width: int
+ # height: int
+ # size: float
+ # aigc: dict[str, Any] | None = None
+ # live_url: str | None = None
+
+
+class OpusSummary(Struct):
+ """图文动态摘要"""
+
+ text: str
+ # rich_text_nodes: list[dict[str, Any]]
+
+
+class OpusContent(Struct):
+ """图文动态内容"""
+
+ jump_url: str
+ pics: list[OpusImage]
+ summary: OpusSummary
+ title: str | None = None
+ # fold_action: list[str] | None = None
+
+
+class DynamicMajor(Struct):
+ """动态主要内容"""
+
+ type: str
+ archive: VideoArchive | None = None
+ opus: OpusContent | None = None
+
+ @property
+ def title(self) -> str | None:
+ """获取标题"""
+ if self.type == "MAJOR_TYPE_ARCHIVE" and self.archive:
+ return self.archive.title
+ return None
+
+ @property
+ def text(self) -> str | None:
+ """获取文本内容"""
+ if self.type == "MAJOR_TYPE_ARCHIVE" and self.archive:
+ return self.archive.desc
+ elif self.type == "MAJOR_TYPE_OPUS" and self.opus:
+ return self.opus.summary.text
+ return None
+
+ @property
+ def image_urls(self) -> list[str]:
+ """获取图片URL列表"""
+ if self.type == "MAJOR_TYPE_OPUS" and self.opus:
+ return [pic.url for pic in self.opus.pics]
+ elif self.type == "MAJOR_TYPE_ARCHIVE" and self.archive and self.archive.cover:
+ return [self.archive.cover]
+ return []
+
+ @property
+ def cover_url(self) -> str | None:
+ """获取封面URL"""
+ if self.type == "MAJOR_TYPE_ARCHIVE" and self.archive:
+ return self.archive.cover
+ return None
+
+
+class DynamicModule(Struct):
+ """动态模块"""
+
+ module_author: AuthorInfo
+ module_dynamic: dict[str, Any] | None = None
+ module_stat: dict[str, Any] | None = None
+
+ @property
+ def author_name(self) -> str:
+ """获取作者名称"""
+ return self.module_author.name
+
+ @property
+ def author_face(self) -> str:
+ """获取作者头像URL"""
+ return self.module_author.face
+
+ @property
+ def pub_ts(self) -> int:
+ """获取发布时间戳"""
+ return self.module_author.pub_ts
+
+ @property
+ def major_info(self) -> dict[str, Any] | None:
+ """获取主要内容信息"""
+ if self.module_dynamic:
+ return self.module_dynamic.get("major")
+ return None
+
+
+class DynamicInfo(Struct):
+ """动态信息"""
+
+ id_str: str
+ type: str
+ visible: bool
+ modules: DynamicModule
+ basic: dict[str, Any] | None = None
+
+ @property
+ def name(self) -> str:
+ """获取作者名称"""
+ return self.modules.author_name
+
+ @property
+ def avatar(self) -> str:
+ """获取作者头像URL"""
+ return self.modules.author_face
+
+ @property
+ def timestamp(self) -> int:
+ """获取发布时间戳"""
+ return self.modules.pub_ts
+
+ @property
+ def title(self) -> str | None:
+ """获取标题"""
+ major_info = self.modules.major_info
+ if major_info:
+ major = convert(major_info, DynamicMajor)
+ return major.title
+ return None
+
+ @property
+ def text(self) -> str | None:
+ """获取文本内容"""
+ major_info = self.modules.major_info
+ if major_info:
+ major = convert(major_info, DynamicMajor)
+ return major.text
+ return None
+
+ @property
+ def image_urls(self) -> list[str]:
+ """获取图片URL列表"""
+ major_info = self.modules.major_info
+ if major_info:
+ major = convert(major_info, DynamicMajor)
+ return major.image_urls
+ return []
+
+ @property
+ def cover_url(self) -> str | None:
+ """获取封面URL"""
+ major_info = self.modules.major_info
+ if major_info:
+ major = convert(major_info, DynamicMajor)
+ return major.cover_url
+ return None
+
+
+class DynamicItem(Struct):
+ """动态项目"""
+
+ item: DynamicInfo
diff --git a/core/parsers/bilibili/favlist.py b/core/parsers/bilibili/favlist.py
new file mode 100644
index 0000000..823deca
--- /dev/null
+++ b/core/parsers/bilibili/favlist.py
@@ -0,0 +1,66 @@
+from msgspec import Struct
+
+from .common import Upper
+
+
+class FavItem(Struct):
+ title: str
+ cover: str
+ intro: str
+ link: str
+
+ @property
+ def url(self) -> str:
+ """完整链接"""
+ return self.link.replace("bilibili://video/", "https://bilibili.com/video/av")
+
+ @property
+ def desc(self) -> str:
+ """描述"""
+ return f"标题: {self.title}\n简介: {self.intro}\n链接: {self.url}"
+
+ @property
+ def avid(self) -> int:
+ """avid"""
+ return int(self.link.split("/")[-1])
+
+
+class FavInfo(Struct):
+ # id: int
+ # fid: int
+ # mid: int
+ title: str
+ """标题"""
+ cover: str
+ """封面"""
+ upper: Upper
+ """up 主信息"""
+ ctime: int
+ """创建时间戳"""
+ mtime: int
+ """修改时间戳"""
+ media_count: int
+ """媒体数量"""
+ intro: str
+ """简介"""
+
+
+class FavData(Struct):
+ info: FavInfo
+ medias: list[FavItem]
+
+ @property
+ def title(self) -> str:
+ return f"收藏夹 - {self.info.title}"
+
+ @property
+ def cover(self) -> str:
+ return self.info.cover
+
+ @property
+ def desc(self) -> str:
+ return f"简介: {self.info.intro}"
+
+ @property
+ def timestamp(self) -> int:
+ return self.info.ctime
diff --git a/core/parsers/bilibili/live.py b/core/parsers/bilibili/live.py
new file mode 100644
index 0000000..25bbd81
--- /dev/null
+++ b/core/parsers/bilibili/live.py
@@ -0,0 +1,72 @@
+from msgspec import Struct
+
+
+class RoomInfo(Struct):
+ title: str
+ """标题"""
+ cover: str
+ """封面"""
+ keyframe: str
+ """关键帧"""
+ tags: str
+ """标签"""
+ area_name: str
+ """分区名称"""
+ parent_area_name: str
+ """父分区名称"""
+
+
+class BaseInfo(Struct):
+ uname: str
+ """用户名"""
+ face: str
+ """头像"""
+ gender: str
+ """性别"""
+
+
+class LiveInfo(Struct):
+ level: int
+ """等级"""
+ level_color: int
+ """等级颜色"""
+ score: int
+ """分数"""
+
+
+class AnchorInfo(Struct):
+ base_info: BaseInfo
+ """基础信息"""
+ live_info: LiveInfo
+ """直播信息"""
+
+
+class RoomData(Struct):
+ room_info: RoomInfo
+ """房间信息"""
+ anchor_info: AnchorInfo
+ """主播信息"""
+
+ @property
+ def title(self) -> str:
+ return f"直播 - {self.room_info.title}"
+
+ @property
+ def cover(self) -> str:
+ return self.room_info.cover
+
+ @property
+ def detail(self) -> str:
+ return f"分区: {self.room_info.area_name} | {self.room_info.parent_area_name}\n标签: {self.room_info.tags}"
+
+ @property
+ def keyframe(self) -> str:
+ return self.room_info.keyframe
+
+ @property
+ def name(self) -> str:
+ return self.anchor_info.base_info.uname
+
+ @property
+ def avatar(self) -> str:
+ return self.anchor_info.base_info.face
diff --git a/core/parsers/bilibili/opus.py b/core/parsers/bilibili/opus.py
new file mode 100644
index 0000000..6ffc695
--- /dev/null
+++ b/core/parsers/bilibili/opus.py
@@ -0,0 +1,153 @@
+from collections.abc import Generator
+from typing import Any
+
+from msgspec import Struct
+
+
+class TextNode(Struct, tag="TextNode"):
+ """图文动态文本节点"""
+
+ text: str
+ """文本内容"""
+
+
+class ImageNode(Struct, tag="ImageNode"):
+ """图文动态图片节点"""
+
+ url: str
+ """图片链接"""
+ alt: str | None = None
+ """图片描述"""
+
+
+class Author(Struct):
+ """图文动态作者信息"""
+
+ name: str
+ face: str
+ mid: int
+ pub_time: str
+ pub_ts: int
+
+
+class Image(Struct):
+ """图文动态图片信息"""
+
+ url: str
+ # width: int
+ # height: int
+ # size: float
+
+
+class Pic(Struct):
+ """图文动态图片组"""
+
+ pics: list[Image]
+ style: int
+
+
+class Text(Struct):
+ """图文动态文本"""
+
+ nodes: list[dict[str, Any]]
+
+
+class Paragraph(Struct):
+ """图文动态段落"""
+
+ para_type: int
+ text: Text | None = None
+ pic: Pic | None = None
+ # align: int = 0
+ # format: dict[str, Any] | None = None
+
+
+class Content(Struct):
+ """图文动态内容"""
+
+ paragraphs: list[Paragraph]
+
+
+class Stat(Struct):
+ """图文动态统计"""
+
+ like: dict[str, Any] | None = None
+ comment: dict[str, Any] | None = None
+ forward: dict[str, Any] | None = None
+ favorite: dict[str, Any] | None = None
+ coin: dict[str, Any] | None = None
+
+
+class Module(Struct):
+ """图文动态模块"""
+
+ module_type: str
+ module_author: Author | None = None
+ module_content: Content | None = None
+ # module_stat: OpusStat | None = None
+
+
+class Basic(Struct):
+ """图文动态基本信息"""
+
+ title: str
+
+
+class Info(Struct):
+ """图文动态信息"""
+
+ id_str: str
+ type: int
+ modules: list[Module]
+ basic: Basic | None = None
+
+
+class OpusItem(Struct):
+ """图文动态项目"""
+
+ item: Info
+
+ @property
+ def title(self) -> str | None:
+ return self.item.basic.title if self.item.basic else None
+
+ @property
+ def name_avatar(self) -> tuple[str, str]:
+ author_module = next(module.module_author for module in self.item.modules if module.module_author)
+ return author_module.name, author_module.face
+
+ @property
+ def timestamp(self) -> int | None:
+ """获取发布时间戳"""
+ for module in self.item.modules:
+ if module.module_type == "MODULE_TYPE_AUTHOR" and module.module_author:
+ return module.module_author.pub_ts
+ return None
+
+ def gen_text_img(self) -> Generator[TextNode | ImageNode, None, None]:
+ """生成图文节点(保持顺序)"""
+ for module in self.item.modules:
+ if module.module_type == "MODULE_TYPE_CONTENT" and module.module_content:
+ for paragraph in module.module_content.paragraphs:
+ # 处理文本段落
+ if paragraph.text and paragraph.text.nodes:
+ text_content = self._extract_text_from_nodes(paragraph.text.nodes)
+ text_content = text_content.strip()
+ if text_content:
+ yield TextNode(text="\n\n" + text_content)
+
+ # 处理图片段落
+ if paragraph.pic and paragraph.pic.pics:
+ for pic in paragraph.pic.pics:
+ yield ImageNode(url=pic.url)
+
+ def _extract_text_from_nodes(self, nodes: list[dict[str, Any]]) -> str:
+ """从节点列表中提取文本内容"""
+ text_content = ""
+ for node in nodes:
+ if node.get("type") in [
+ "TEXT_NODE_TYPE_WORD",
+ "TEXT_NODE_TYPE_RICH",
+ ] and node.get("word"):
+ text_content += node["word"].get("words", "")
+ return text_content
diff --git a/core/parsers/bilibili/video.py b/core/parsers/bilibili/video.py
new file mode 100644
index 0000000..3740a72
--- /dev/null
+++ b/core/parsers/bilibili/video.py
@@ -0,0 +1,140 @@
+from dataclasses import dataclass
+
+from msgspec import Struct
+
+from .common import Upper
+
+
+class Stats(Struct):
+ view: int
+ """播放量"""
+ danmaku: int
+ """弹幕数"""
+ reply: int
+ """回复数"""
+ favorite: int
+ """收藏数"""
+ coin: int
+ """硬币数"""
+ share: int
+ """分享数"""
+ like: int
+ """点赞数"""
+
+
+class Page(Struct):
+ part: str
+ """分集标题"""
+ ctime: int
+ """创建时间戳"""
+ duration: int
+ """时长"""
+ first_frame: str | None = None
+ """封面图片"""
+
+
+@dataclass(frozen=True, slots=True)
+class PageInfo:
+ index: int
+ title: str
+ duration: int
+ timestamp: int
+ cover: str | None = None
+
+
+class VideoInfo(Struct):
+ bvid: str
+ """bvid"""
+ title: str
+ """标题"""
+ desc: str
+ """简介"""
+ duration: int
+ """时长"""
+ owner: Upper
+ """作者信息"""
+ stat: Stats
+ """统计信息"""
+ pubdate: int
+ """公开时间戳"""
+ ctime: int
+ """创建时间戳"""
+ pic: str | None = None
+ """封面图片"""
+ pages: list[Page] | None = None
+ """分集信息"""
+
+ @property
+ def title_with_part(self) -> str:
+ if self.pages and len(self.pages) > 1:
+ return f"{self.title} - {self.pages[0].part}"
+ return self.title
+
+ @property
+ def formatted_stats_info(self) -> str:
+ """
+ 格式化视频信息
+ """
+ # 定义需要展示的数据及其显示名称
+ stats_mapping = [
+ ("👍", self.stat.like),
+ ("🪙", self.stat.coin),
+ ("⭐", self.stat.favorite),
+ ("↩️", self.stat.share),
+ ("💬", self.stat.reply),
+ ("👀", self.stat.view),
+ ("💭", self.stat.danmaku),
+ ]
+
+ # 构建结果字符串
+ result_parts = []
+ for display_name, value in stats_mapping:
+ # 数值超过10000时转换为万为单位
+ formatted_value = f"{value / 10000:.1f}万" if value > 10000 else str(value)
+ result_parts.append(f"{display_name} {formatted_value}")
+
+ return " ".join(result_parts)
+
+ def extract_info_with_page(self, page_num: int = 1) -> PageInfo:
+ """获取视频信息,包含页索引、标题、时长、封面
+ Args:
+ page_num (int): 页索引. Defaults to 1.
+
+ Returns:
+ tuple[int, str, int, str | None]: 页索引、标题、时长、封面
+ """
+ page_idx = page_num - 1
+ title = self.title
+ duration = self.duration
+ cover = self.pic
+ timestamp = self.pubdate
+
+ if self.pages and len(self.pages) > 1:
+ page_idx = page_idx % len(self.pages)
+ page = self.pages[page_idx]
+ title += f" | 分集 - {page.part}"
+ duration = page.duration
+ cover = page.first_frame
+ timestamp = page.ctime
+
+ return PageInfo(
+ index=page_idx,
+ title=title,
+ duration=duration,
+ timestamp=timestamp,
+ cover=cover,
+ )
+
+
+class ModelResult(Struct):
+ summary: str
+
+
+class AIConclusion(Struct):
+ model_result: ModelResult | None = None
+
+ @property
+ def summary(self) -> str:
+ if self.model_result and self.model_result.summary:
+ return f"AI总结: {self.model_result.summary}"
+ return "该视频暂不支持AI总结"
diff --git a/core/parsers/data.py b/core/parsers/data.py
new file mode 100644
index 0000000..e56f6f8
--- /dev/null
+++ b/core/parsers/data.py
@@ -0,0 +1,241 @@
+from asyncio import Task
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, TypedDict
+
+
+def repr_path_task(path_task: Path | Task[Path]) -> str:
+ if isinstance(path_task, Path):
+ return f"path={path_task.name}"
+ else:
+ return f"task={path_task.get_name()}, done={path_task.done()}"
+
+
+@dataclass(repr=False, slots=True)
+class MediaContent:
+ path_task: Path | Task[Path]
+
+ async def get_path(self) -> Path:
+ if isinstance(self.path_task, Path):
+ return self.path_task
+ self.path_task = await self.path_task
+ return self.path_task
+
+ def __repr__(self) -> str:
+ prefix = self.__class__.__name__
+ return f"{prefix}({repr_path_task(self.path_task)})"
+
+
+@dataclass(repr=False, slots=True)
+class AudioContent(MediaContent):
+ """音频内容"""
+
+ duration: float = 0.0
+
+
+@dataclass(repr=False, slots=True)
+class VideoContent(MediaContent):
+ """视频内容"""
+
+ cover: Path | Task[Path] | None = None
+ """视频封面"""
+ duration: float = 0.0
+ """时长 单位: 秒"""
+
+ async def get_cover_path(self) -> Path | None:
+ if self.cover is None:
+ return None
+ if isinstance(self.cover, Path):
+ return self.cover
+ self.cover = await self.cover
+ return self.cover
+
+ @property
+ def display_duration(self) -> str:
+ minutes = int(self.duration) // 60
+ seconds = int(self.duration) % 60
+ return f"时长: {minutes}:{seconds:02d}"
+
+ def __repr__(self) -> str:
+ repr = f"VideoContent(path={repr_path_task(self.path_task)}"
+ if self.cover is not None:
+ repr += f", cover={repr_path_task(self.cover)}"
+ return repr + ")"
+
+
+@dataclass(repr=False, slots=True)
+class ImageContent(MediaContent):
+ """图片内容"""
+
+ pass
+
+
+@dataclass(repr=False, slots=True)
+class DynamicContent(MediaContent):
+ """动态内容 视频格式 后续转 gif"""
+
+ gif_path: Path | None = None
+
+
+@dataclass(repr=False, slots=True)
+class GraphicsContent(MediaContent):
+ """图文内容 渲染时文字在前 图片在后"""
+
+ text: str | None = None
+ """图片前的文本内容"""
+ alt: str | None = None
+ """图片描述 渲染时居中显示"""
+
+ def __repr__(self) -> str:
+ repr = f"GraphicsContent(path={repr_path_task(self.path_task)}"
+ if self.text:
+ repr += f", text={self.text}"
+ if self.alt:
+ repr += f", alt={self.alt}"
+ return repr + ")"
+
+
+@dataclass(slots=True)
+class Platform:
+ """平台信息"""
+
+ name: str
+ """ 平台名称 """
+ display_name: str
+ """ 平台显示名称 """
+
+
+@dataclass(repr=False, slots=True)
+class Author:
+ """作者信息"""
+
+ name: str
+ """作者名称"""
+ avatar: Path | Task[Path] | None = None
+ """作者头像 URL 或本地路径"""
+ description: str | None = None
+ """作者个性签名等"""
+
+ async def get_avatar_path(self) -> Path | None:
+ if self.avatar is None:
+ return None
+ if isinstance(self.avatar, Path):
+ return self.avatar
+ self.avatar = await self.avatar
+ return self.avatar
+
+ def __repr__(self) -> str:
+ repr = f"Author(name={self.name}"
+ if self.avatar:
+ repr += f", avatar_{repr_path_task(self.avatar)}"
+ if self.description:
+ repr += f", description={self.description}"
+ return repr + ")"
+
+
+@dataclass(repr=False, slots=True)
+class ParseResult:
+ """完整的解析结果"""
+
+ platform: Platform
+ """平台信息"""
+ author: Author | None = None
+ """作者信息"""
+ title: str | None = None
+ """标题"""
+ text: str | None = None
+ """文本内容"""
+ timestamp: int | None = None
+ """发布时间戳, 秒"""
+ url: str | None = None
+ """来源链接"""
+ contents: list[MediaContent] = field(default_factory=list)
+ """媒体内容"""
+ extra: dict[str, Any] = field(default_factory=dict)
+ """额外信息"""
+ repost: "ParseResult | None" = None
+ """转发的内容"""
+ render_image: Path | None = None
+ """渲染图片"""
+
+ @property
+ def header(self) -> str | None:
+ """头信息 仅用于 default render"""
+ header = self.platform.display_name
+ if self.author:
+ header += f" @{self.author.name}"
+ if self.title:
+ header += f" | {self.title}"
+ return header
+
+ @property
+ def display_url(self) -> str | None:
+ return f"链接: {self.url}" if self.url else None
+
+ @property
+ def repost_display_url(self) -> str | None:
+ return f"原帖: {self.repost.url}" if self.repost and self.repost.url else None
+
+ @property
+ def extra_info(self) -> str | None:
+ return self.extra.get("info")
+
+ @property
+ def video_contents(self) -> list[VideoContent]:
+ return [cont for cont in self.contents if isinstance(cont, VideoContent)]
+
+ @property
+ def img_contents(self) -> list[ImageContent]:
+ return [cont for cont in self.contents if isinstance(cont, ImageContent)]
+
+ @property
+ def audio_contents(self) -> list[AudioContent]:
+ return [cont for cont in self.contents if isinstance(cont, AudioContent)]
+
+ @property
+ def dynamic_contents(self) -> list[DynamicContent]:
+ return [cont for cont in self.contents if isinstance(cont, DynamicContent)]
+
+ @property
+ def graphics_contents(self) -> list[GraphicsContent]:
+ return [cont for cont in self.contents if isinstance(cont, GraphicsContent)]
+
+ @property
+ async def cover_path(self) -> Path | None:
+ """获取封面路径"""
+ for cont in self.contents:
+ if isinstance(cont, VideoContent):
+ return await cont.get_cover_path()
+ return None
+
+ @property
+ def formatted_datetime(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> str | None:
+ """格式化时间戳"""
+ return datetime.fromtimestamp(self.timestamp).strftime(fmt) if self.timestamp is not None else None
+
+ def __repr__(self) -> str:
+ return (
+ f"platform: {self.platform.display_name}, "
+ f"timestamp: {self.timestamp}, "
+ f"title: {self.title}, "
+ f"text: {self.text}, "
+ f"url: {self.url}, "
+ f"author: {self.author}, "
+ f"contents: {self.contents}, "
+ f"extra: {self.extra}, "
+ f"repost: <<<<<<<{self.repost}>>>>>>, "
+ f"render_image: {self.render_image.name if self.render_image else 'None'}"
+ )
+
+
+
+class ParseResultKwargs(TypedDict, total=False):
+ title: str | None
+ text: str | None
+ contents: list[MediaContent]
+ timestamp: int | None
+ url: str | None
+ author: Author | None
+ extra: dict[str, Any]
+ repost: ParseResult | None
diff --git a/core/parsers/douyin/__init__.py b/core/parsers/douyin/__init__.py
new file mode 100644
index 0000000..2c05d90
--- /dev/null
+++ b/core/parsers/douyin/__init__.py
@@ -0,0 +1,148 @@
+import re
+from typing import ClassVar
+
+import msgspec
+from aiohttp import TCPConnector
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..base import (
+ BaseParser,
+ Downloader,
+ ParseException,
+ Platform,
+ PlatformEnum,
+ handle,
+)
+
+
+class DouyinParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.DOUYIN, display_name="抖音")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ # https://v.douyin.com/_2ljF4AmKL8
+ @handle("v.douyin", r"v\.douyin\.com/[a-zA-Z0-9_\-]+")
+ @handle("jx.douyin", r"jx\.douyin\.com/[a-zA-Z0-9_\-]+")
+ async def _parse_short_link(self, searched: re.Match[str]):
+ url = f"https://{searched.group(0)}"
+ return await self.parse_with_redirect(url)
+
+ # https://www.douyin.com/video/7521023890996514083
+ # https://www.douyin.com/note/7469411074119322899
+ @handle("douyin", r"douyin\.com/(?Pvideo|note)/(?P\d+)")
+ @handle("iesdouyin", r"iesdouyin\.com/share/(?Pslides|video|note)/(?P\d+)")
+ @handle("m.douyin", r"m\.douyin\.com/share/(?Pslides|video|note)/(?P\d+)")
+ # https://jingxuan.douyin.com/m/video/7574300896016862490?app=yumme&utm_source=copy_link
+ @handle(
+ "jingxuan.douyin",
+ r"jingxuan\.douyin.com/m/(?Pslides|video|note)/(?P\d+)",
+ )
+ async def _parse_douyin(self, searched: re.Match[str]):
+ ty, vid = searched.group("ty"), searched.group("vid")
+ if ty == "slides":
+ return await self.parse_slides(vid)
+
+ for url in (
+ self._build_m_douyin_url(ty, vid),
+ self._build_iesdouyin_url(ty, vid),
+ ):
+ try:
+ return await self.parse_video(url)
+ except ParseException as e:
+ logger.warning(f"failed to parse {url}, error: {e}")
+ continue
+ raise ParseException("分享已删除或资源直链提取失败, 请稍后再试")
+
+ @staticmethod
+ def _build_iesdouyin_url(ty: str, vid: str) -> str:
+ return f"https://www.iesdouyin.com/share/{ty}/{vid}"
+
+ @staticmethod
+ def _build_m_douyin_url(ty: str, vid: str) -> str:
+ return f"https://m.douyin.com/share/{ty}/{vid}"
+
+ async def parse_video(self, url: str):
+ async with self.client.get(
+ url,
+ headers=self.ios_headers,
+ allow_redirects=False,
+ connector=TCPConnector(ssl=False),
+ ) as resp:
+ if resp.status != 200:
+ raise ParseException(f"status: {resp.status}")
+ text = await resp.text()
+
+ pattern = re.compile(
+ pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)",
+ flags=re.DOTALL,
+ )
+ matched = pattern.search(text)
+
+ if not matched or not matched.group(1):
+ raise ParseException("can't find _ROUTER_DATA in html")
+
+ from .video import RouterData
+
+ video_data = msgspec.json.decode(matched.group(1).strip(), type=RouterData).video_data
+ # 使用新的简洁构建方式
+ contents = []
+
+ # 添加图片内容
+ if image_urls := video_data.image_urls:
+ contents.extend(self.create_image_contents(image_urls))
+
+ # 添加视频内容
+ elif video_url := video_data.video_url:
+ cover_url = video_data.cover_url
+ duration = video_data.video.duration if video_data.video else 0
+ contents.append(self.create_video_content(video_url, cover_url, duration))
+
+ # 构建作者
+ author = self.create_author(video_data.author.nickname, video_data.avatar_url)
+
+ return self.result(
+ title=video_data.desc,
+ author=author,
+ contents=contents,
+ timestamp=video_data.create_time,
+ )
+
+ async def parse_slides(self, video_id: str):
+ url = "https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/"
+ params = {
+ "aweme_ids": f"[{video_id}]",
+ "request_source": "200",
+ }
+ async with self.client.get(
+ url,
+ params=params,
+ headers=self.android_headers,
+ connector=TCPConnector(ssl=False),
+ ) as resp:
+ resp.raise_for_status()
+
+ from .slides import SlidesInfo
+
+ slides_data = msgspec.json.decode(await resp.read(), type=SlidesInfo).aweme_details[0]
+ contents = []
+
+ # 添加图片内容
+ if image_urls := slides_data.image_urls:
+ contents.extend(self.create_image_contents(image_urls))
+
+ # 添加动态内容
+ if dynamic_urls := slides_data.dynamic_urls:
+ contents.extend(self.create_dynamic_contents(dynamic_urls))
+
+ # 构建作者
+ author = self.create_author(slides_data.name, slides_data.avatar_url)
+
+ return self.result(
+ title=slides_data.desc,
+ author=author,
+ contents=contents,
+ timestamp=slides_data.create_time,
+ )
diff --git a/core/parsers/douyin/slides.py b/core/parsers/douyin/slides.py
new file mode 100644
index 0000000..b69fdf6
--- /dev/null
+++ b/core/parsers/douyin/slides.py
@@ -0,0 +1,59 @@
+from random import choice
+
+from msgspec import Struct, field
+
+
+class PlayAddr(Struct):
+ url_list: list[str]
+
+
+class Cover(Struct):
+ url_list: list[str]
+
+
+class Video(Struct):
+ play_addr: PlayAddr
+ cover: Cover
+ duration: int
+
+
+class Image(Struct):
+ video: Video | None = None
+ url_list: list[str] = field(default_factory=list)
+
+
+class Avatar(Struct):
+ url_list: list[str]
+
+
+class Author(Struct):
+ nickname: str
+ # avatar_larger: Avatar
+ avatar_thumb: Avatar
+
+
+class SlidesData(Struct):
+ author: Author
+ desc: str
+ create_time: int
+ images: list[Image]
+
+ @property
+ def name(self) -> str:
+ return self.author.nickname
+
+ @property
+ def avatar_url(self) -> str:
+ return choice(self.author.avatar_thumb.url_list)
+
+ @property
+ def image_urls(self) -> list[str]:
+ return [choice(image.url_list) for image in self.images]
+
+ @property
+ def dynamic_urls(self) -> list[str]:
+ return [choice(image.video.play_addr.url_list) for image in self.images if image.video]
+
+
+class SlidesInfo(Struct):
+ aweme_details: list[SlidesData] = field(default_factory=list)
diff --git a/core/parsers/douyin/video.py b/core/parsers/douyin/video.py
new file mode 100644
index 0000000..8cbe0a1
--- /dev/null
+++ b/core/parsers/douyin/video.py
@@ -0,0 +1,95 @@
+from random import choice
+from typing import Any
+
+from msgspec import Struct, field
+
+from ..base import ParseException
+
+
+class Avatar(Struct):
+ url_list: list[str]
+
+
+class Author(Struct):
+ nickname: str
+ avatar_thumb: Avatar | None = None
+ avatar_medium: Avatar | None = None
+
+
+class PlayAddr(Struct):
+ url_list: list[str]
+
+
+class Cover(Struct):
+ url_list: list[str]
+
+
+class Video(Struct):
+ play_addr: PlayAddr
+ cover: Cover
+ duration: int
+
+
+class Image(Struct):
+ video: Video | None = None
+ url_list: list[str] = field(default_factory=list)
+
+
+class VideoData(Struct):
+ create_time: int
+ author: Author
+ desc: str
+ images: list[Image] | None = None
+ video: Video | None = None
+
+ @property
+ def image_urls(self) -> list[str]:
+ return [choice(image.url_list) for image in self.images] if self.images else []
+
+ @property
+ def video_url(self) -> str | None:
+ return choice(self.video.play_addr.url_list).replace("playwm", "play") if self.video else None
+
+ @property
+ def cover_url(self) -> str | None:
+ return choice(self.video.cover.url_list) if self.video else None
+
+ @property
+ def avatar_url(self) -> str | None:
+ if avatar := self.author.avatar_thumb:
+ return choice(avatar.url_list)
+ elif avatar := self.author.avatar_medium:
+ return choice(avatar.url_list)
+ return None
+
+
+class VideoInfoRes(Struct):
+ item_list: list[VideoData] = field(default_factory=list)
+
+ @property
+ def video_data(self) -> VideoData:
+ if len(self.item_list) == 0:
+ raise ParseException("can't find data in videoInfoRes")
+ return choice(self.item_list)
+
+
+class VideoOrNotePage(Struct):
+ video_info_res: VideoInfoRes = field(name="videoInfoRes", default_factory=VideoInfoRes)
+
+
+class LoaderData(Struct):
+ video_page: VideoOrNotePage | None = field(name="video_(id)/page", default=None)
+ note_page: VideoOrNotePage | None = field(name="note_(id)/page", default=None)
+
+
+class RouterData(Struct):
+ loader_data: LoaderData = field(name="loaderData", default_factory=LoaderData)
+ errors: dict[str, Any] | None = None
+
+ @property
+ def video_data(self) -> VideoData:
+ if page := self.loader_data.video_page:
+ return page.video_info_res.video_data
+ elif page := self.loader_data.note_page:
+ return page.video_info_res.video_data
+ raise ParseException("can't find video_(id)/page or note_(id)/page in router data")
diff --git a/core/parsers/example.py b/core/parsers/example.py
new file mode 100644
index 0000000..c869f8b
--- /dev/null
+++ b/core/parsers/example.py
@@ -0,0 +1,134 @@
+from re import Match
+from typing import ClassVar
+
+from aiohttp import ClientError
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, handle
+from .data import Platform
+
+"""
+这是一个示例解析器,请感兴趣的开发者自行实现解析器,并提交PR。
+
+"""
+
+class ExampleParser(BaseParser):
+ """示例视频网站解析器"""
+
+ platform: ClassVar[Platform] = Platform(name="example", display_name="示例网站")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+
+ @handle("ex.short", r"ex\.short/\w+)")
+ async def _parse_short_link(self, searched: Match[str]):
+ """解析短链"""
+ url = f"https://{searched.group(0)}"
+ # 重定向再解析,请确保重定向链接的 handle 存在
+ # 比如 url 重定向到 example.com/... 就会调用 _parse 解析
+ return await self.parse_with_redirect(url)
+
+ @handle("example.com", r"example\.com/video/(?P\w+)")
+ @handle("exam.ple", r"exam\.ple/(?P\w+)")
+ async def _parse(self, searched: Match[str]):
+ # 1. 提取视频 ID
+ video_id = searched.group("video_id")
+ url = f"https://api.example.com/video/{video_id}"
+ # 2. 请求 API 获取视频信息
+ async with self.client.get(url, headers=self.headers) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"HTTP {resp.status} {resp.reason}")
+ data = await resp.json()
+
+ # 3. 提取数据
+ title = data["title"]
+ author_name = data["author"]["name"]
+ avatar_url = data["author"]["avatar"]
+ video_url = data["video_url"]
+ cover_url = data["cover_url"]
+ duration = data["duration"]
+ timestamp = data["publish_time"]
+ description = data.get("description", "")
+
+ # 4. 视频内容
+ author = self.create_author(author_name, avatar_url)
+ video = self.create_video_content(video_url, cover_url, duration)
+
+ # 5. 图集内容
+ image_urls = data.get("images")
+ images = self.create_image_contents(image_urls)
+
+ # 6. 返回解析结果
+ return self.result(
+ title=title,
+ text=description,
+ author=author,
+ contents=[video, *images],
+ timestamp=timestamp,
+ url=f"https://example.com/video/{video_id}",
+ )
+
+
+"""
+
+# 构建作者信息
+
+author = self.create_author(
+ name="作者名",
+ avatar_url="https://example.com/avatar.jpg", # 可选,会自动下载
+ description="个性签名" # 可选
+)
+
+
+# 构建视频内容
+
+## 方式1:传入 URL,自动下载
+video = self.create_video_content(
+ url_or_task="https://example.com/video.mp4",
+ cover_url="https://example.com/cover.jpg", # 可选
+ duration=120.5 # 可选,单位:秒
+)
+
+## 方式2:传入已创建的下载任务
+video_task = self.download.download_video(url, ext_headers=self.headers)
+video = self.create_video_content(
+ url_or_task=video_task,
+ cover_url=cover_url,
+ duration=duration
+)
+
+
+# 并发下载图集内容
+images = self.create_image_contents([
+ "https://example.com/img1.jpg",
+ "https://example.com/img2.jpg",
+])
+
+
+# 构建图文内容(适用于类似 Bilibili 动态图文混排)
+
+graphics = self.create_graphics_content(
+ image_url="https://example.com/image.jpg",
+ text="图片前的文字说明", # 可选
+ alt="图片描述" # 可选,居中显示
+)
+
+
+# 创建动图GIF内容,平台一般只提供视频, 后续插件会做自动转为 gif 的处理
+
+dynamics = self.create_dynamic_contents([
+ "https://example.com/dynamic1.mp4",
+ "https://example.com/dynamic2.mp4",
+])
+
+
+# 重定向 url
+
+real_url = await self.get_redirect_url(
+ url="https://short.url/abc",
+ headers=self.headers # 可选
+)
+
+"""
diff --git a/core/parsers/kuaishou.py b/core/parsers/kuaishou.py
new file mode 100644
index 0000000..f6cf99e
--- /dev/null
+++ b/core/parsers/kuaishou.py
@@ -0,0 +1,142 @@
+import re
+from random import choice
+from typing import ClassVar, TypeAlias
+
+import msgspec
+from msgspec import Struct, field
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, ParseException, PlatformEnum, handle
+from .data import Platform
+
+
+class KuaiShouParser(BaseParser):
+ """快手解析器"""
+
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.KUAISHOU, display_name="快手")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ self.ios_headers["Referer"] = "https://v.kuaishou.com/"
+
+ # https://v.kuaishou.com/2yAnzeZ
+ @handle("v.kuaishou", r"v\.kuaishou\.com/[A-Za-z\d._?%&+\-=/#]+")
+ # https://www.kuaishou.com/short-video/3xhjgcmir24m4nm
+ @handle("kuaishou", r"(?:www\.)?kuaishou\.com/[A-Za-z\d._?%&+\-=/#]+")
+ # https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc
+ @handle("chenzhongtech", r"(?:v\.m\.)?chenzhongtech\.com/fw/[A-Za-z\d._?%&+\-=/#]+")
+ async def _parse_v_kuaishou(self, searched: re.Match[str]):
+ # 从匹配对象中获取原始URL
+ url = f"https://{searched.group(0)}"
+ real_url = await self.get_redirect_url(url, headers=self.ios_headers)
+
+ if len(real_url) <= 0:
+ raise ParseException("failed to get location url from url")
+
+ # /fw/long-video/ 返回结果不一样, 统一替换为 /fw/photo/ 请求
+ real_url = real_url.replace("/fw/long-video/", "/fw/photo/")
+
+ async with self.client.get(real_url, headers=self.ios_headers) as resp:
+
+ if resp.status >= 400:
+ raise ParseException(f"获取页面失败 {resp.status}")
+ response_text = await resp.text()
+
+ pattern = r"window\.INIT_STATE\s*=\s*(.*?)"
+ matched = re.search(pattern, response_text)
+
+ if not matched:
+ raise ParseException("failed to parse video JSON info from HTML")
+
+ json_str = matched.group(1).strip()
+ init_state = msgspec.json.decode(json_str, type=KuaishouInitState)
+ photo = next((d.photo for d in init_state.values() if d.photo is not None), None)
+ if photo is None:
+ raise ParseException("window.init_state don't contains videos or pics")
+
+ # 简洁的构建方式
+ contents = []
+
+ # 添加视频内容
+ if video_url := photo.video_url:
+ contents.append(self.create_video_content(video_url, photo.cover_url, photo.duration))
+
+ # 添加图片内容
+ if img_urls := photo.img_urls:
+ contents.extend(self.create_image_contents(img_urls))
+
+ # 构建作者
+ author = self.create_author(photo.name, photo.head_url)
+
+ return self.result(
+ title=photo.caption,
+ author=author,
+ contents=contents,
+ timestamp=photo.timestamp // 1000,
+ )
+
+
+
+
+
+class CdnUrl(Struct):
+ cdn: str
+ url: str | None = None
+
+
+class Atlas(Struct):
+ music_cdn_list: list[CdnUrl] = field(name="musicCdnList", default_factory=list)
+ cdn_list: list[CdnUrl] = field(name="cdnList", default_factory=list)
+ size: list[dict] = field(name="size", default_factory=list)
+ img_route_list: list[str] = field(name="list", default_factory=list)
+
+ @property
+ def img_urls(self):
+ if len(self.cdn_list) == 0 or len(self.img_route_list) == 0:
+ return []
+ cdn = choice(self.cdn_list).cdn
+ return [f"https://{cdn}/{url}" for url in self.img_route_list]
+
+
+class ExtParams(Struct):
+ atlas: Atlas = field(default_factory=Atlas)
+
+
+class Photo(Struct):
+ # 标题
+ caption: str
+ timestamp: int
+ duration: int = 0
+ user_name: str = field(default="未知用户", name="userName")
+ head_url: str | None = field(default=None, name="headUrl")
+ cover_urls: list[CdnUrl] = field(name="coverUrls", default_factory=list)
+ main_mv_urls: list[CdnUrl] = field(name="mainMvUrls", default_factory=list)
+ ext_params: ExtParams = field(name="ext_params", default_factory=ExtParams)
+
+ @property
+ def name(self) -> str:
+ return self.user_name.replace("\u3164", "").strip()
+
+ @property
+ def cover_url(self):
+ return choice(self.cover_urls).url if len(self.cover_urls) != 0 else None
+
+ @property
+ def video_url(self):
+ return choice(self.main_mv_urls).url if len(self.main_mv_urls) != 0 else None
+
+ @property
+ def img_urls(self):
+ return self.ext_params.atlas.img_urls
+
+
+class TusjohData(Struct):
+ result: int
+ photo: Photo | None = None
+
+
+
+KuaishouInitState: TypeAlias = dict[str, TusjohData]
diff --git a/core/parsers/nga.py b/core/parsers/nga.py
new file mode 100644
index 0000000..a10fe6d
--- /dev/null
+++ b/core/parsers/nga.py
@@ -0,0 +1,190 @@
+import asyncio
+import json
+import random
+import re
+import time
+from typing import ClassVar
+
+from aiohttp import ClientError
+from bs4 import BeautifulSoup, Tag
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from ..exception import ParseException
+from .base import BaseParser, Platform, PlatformEnum, handle
+
+
+class NGAParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.NGA, display_name="NGA")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ extra_headers = {
+ "Referer": "https://nga.178.com/",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+ "Accept-Encoding": "gzip, deflate",
+ "Connection": "keep-alive",
+ "Upgrade-Insecure-Requests": "1",
+ }
+ self.headers.update(extra_headers)
+ self.base_img_url = "https://img.nga.178.com/attachments"
+
+ @staticmethod
+ def nga_url(tid: str | int) -> str:
+ return f"https://nga.178.com/read.php?tid={tid}"
+
+ # ("ngabbs.com", r"https?://ngabbs\.com/read\.php\?tid=(?P\d+)(?:[A-Za-z\d=_-]+)?"),
+ # ("nga.178.com", r"https?://nga\.178\.com/read\.php\?tid=(?P\d+)(?:[A-Za-z\d=_-]+)?"),
+ # ("bbs.nga.cn", r"https?://bbs\.nga\.cn/read\.php\?tid=(?P\d+)(?:[A-Za-z\d=_-]+)?"),
+ @handle("ngabbs.com", r"tid=(?P\d+)")
+ @handle("nga.178.com", r"tid=(?P\d+)")
+ @handle("bbs.nga.cn", r"tid=(?P\d+)")
+ async def _parse(self, searched: re.Match[str]):
+ # 从匹配对象中获取原始URL
+ tid = searched.group("tid")
+ url = self.nga_url(tid)
+ html = None
+ async with self.client.get(url, headers=self.headers, allow_redirects=True) as resp:
+ try:
+ # 第一次请求可能返回403,但包含设置cookie的JavaScript
+ html = await resp.text()
+
+ # 如果返回403且包含guestJs cookie设置,提取cookie并重试
+ if resp.status == 403 and "guestJs" in html:
+ # 从JavaScript中提取guestJs cookie值
+ cookie_match = re.search(
+ r"document\.cookie\s*=\s*['\"]guestJs=([^;'\"]+)",
+ html,
+ )
+ if cookie_match:
+ guest_js = cookie_match.group(1)
+ # 等待一小段时间(模拟JavaScript的setTimeout)
+ await asyncio.sleep(0.3)
+
+ # 添加随机参数避免缓存(模拟JavaScript的行为)
+ rand_param = random.randint(0, 999)
+ separator = "&" if "?" in url else "?"
+ retry_url = f"{url}{separator}rand={rand_param}"
+ clean_headers = self.headers.copy()
+ clean_headers["Cookie"] = f"guestJs={guest_js}"
+ async with self.client.get(
+ retry_url,
+ headers=clean_headers,
+ allow_redirects=True,
+ ) as retry_resp:
+
+ html = await retry_resp.text()
+ # 用 retry_resp 的状态继续后面的检查
+ resp = retry_resp
+ if resp.status != 200:
+ raise ParseException(
+ f"仍无法获取页面, HTTP {resp.status}"
+ )
+
+ except ClientError as e:
+ raise ParseException(f"请求失败: {e}")
+
+ if resp.status != 200:
+ raise ParseException(f"无法获取页面, HTTP {resp.status}")
+
+ # 简单识别是否需要登录或被拦截
+ if "需要" in html and ("登录" in html or "请登录" in html):
+ raise ParseException("页面可能需要登录后访问")
+
+ # 使用 BeautifulSoup 解析 HTML
+ soup = BeautifulSoup(html, "html.parser")
+
+ # 提取 title - 从 postsubject0
+ title = None
+ title_tag = soup.find(id="postsubject0")
+ if title_tag and isinstance(title_tag, Tag):
+ title = title_tag.get_text(strip=True)
+
+ # 提取作者 - 先从 postauthor0 标签提取 uid,再从 JavaScript 中查找用户名
+ author = None
+ author_tag = soup.find(id="postauthor0")
+ if author_tag and isinstance(author_tag, Tag):
+ # 从 href 属性中提取 uid: href="nuke.php?func=ucp&uid=24278093"
+ href = author_tag.get("href", "")
+ uid_match = re.search(r"[?&]uid=(\d+)", str(href))
+ if uid_match:
+ uid = uid_match.group(1)
+ # 从 JavaScript 的 commonui.userInfo.setAll() 中查找对应用户名
+ script_pattern = r"commonui\.userInfo\.setAll\s*\(\s*(\{.*?\})\s*\)"
+ script_match = re.search(script_pattern, html, re.DOTALL)
+ if script_match:
+ try:
+ user_info_json = script_match.group(1)
+ user_info = json.loads(user_info_json)
+ # 使用提取的 uid 查找用户名
+ if uid in user_info:
+ author = user_info[uid].get("username")
+ except (json.JSONDecodeError, KeyError):
+ # JSON 解析失败或数据结构不符合预期,保持 author 为 None
+ pass
+ author = self.create_author(author) if author else None
+ # 提取时间 - 从第一个帖子的 postdate0
+ timestamp = None
+ time_tag = soup.find(id="postdate0")
+ if time_tag and isinstance(time_tag, Tag):
+ timestr = time_tag.get_text(strip=True)
+ timestamp = int(time.mktime(time.strptime(timestr, "%Y-%m-%d %H:%M")))
+
+ # 提取文本 - postcontent0
+ text = None
+ content_tag = soup.find(id="postcontent0")
+ contents = []
+ if content_tag and isinstance(content_tag, Tag):
+ text = content_tag.get_text("\n", strip=True)
+ # 清理 BBCode 标签并限制长度
+ img_urls: list[str] = re.findall(r"\[img\](.*?)\[/img\]", text)
+ img_urls = [self.base_img_url + url[1:] for url in img_urls]
+ contents.extend(self.create_image_contents(img_urls))
+ text = self.clean_nga_text(text)
+
+ return self.result(
+ title=title,
+ text=text,
+ url=url,
+ author=author,
+ contents=contents,
+ timestamp=timestamp,
+ )
+
+ @staticmethod
+ def clean_nga_text(text: str, max_length: int = 500) -> str:
+ rules: list[tuple[str, str, int]] = [
+ # 移除图片标签(完整和不完整的)
+ (r"\[img\][^\[\]]*\[/img\]", "", 0),
+ (r"\[img\][^\[\]]*", "", 0),
+ # 处理URL标签,保留链接文本
+ (r"\[url=[^\]]*\]([^\[]*?)\[/url\]", r"\1", 0),
+ (r"\[url\]([^\[]*?)\[/url\]", r"\1", 0),
+ # 移除引用标签
+ (r"\[quote\].*?\[/quote\]", "", re.DOTALL),
+ # 处理格式标签,保留文本内容(b, i, u)
+ (r"\[(b|i|u)\](.*?)\[/\1\]", r"\2", re.DOTALL),
+ # 处理带属性的格式标签(color, size)
+ (r"\[(color|size)=[^\]]*\](.*?)\[/\1\]", r"\2", re.DOTALL),
+ # 移除其他未配对的标签
+ (r"\[[^]]+\]", "", 0),
+ # 清理空白字符
+ (r"\n{3,}", "\n\n", 0), # 多个换行符压缩为两个
+ (r"[ \t]+", " ", 0), # 多个空格/制表符压缩为一个空格
+ (r"\n\s+\n", "\n\n", 0), # 清理空行中的空白字符
+ ]
+
+ for rule in rules:
+ pattern, replacement, flags = rule[0], rule[1], rule[2]
+ text = re.sub(pattern, replacement, text, flags=flags)
+
+ text = text.strip()
+
+ # 限制文本长度
+ if len(text) > max_length:
+ text = text[:max_length] + "..."
+
+ return text
diff --git a/core/parsers/tiktok.py b/core/parsers/tiktok.py
new file mode 100644
index 0000000..3e01257
--- /dev/null
+++ b/core/parsers/tiktok.py
@@ -0,0 +1,38 @@
+import re
+from typing import ClassVar
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, PlatformEnum, handle
+from .data import Author, Platform, VideoContent
+
+
+class TikTokParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.TIKTOK, display_name="TikTok")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+
+ @handle("tiktok.com", r"(?:https?://)?(www|vt|vm)\.tiktok\.com/[A-Za-z0-9._?%&+\-=/#@]*")
+ async def _parse(self, searched: re.Match[str]):
+ # 从匹配对象中获取原始URL
+ url, prefix = searched.group(0), searched.group(1)
+
+ if prefix in ("vt", "vm"):
+ url = await self.get_redirect_url(url)
+
+ # 获取视频信息
+ video_info = await self.downloader.ytdlp_extract_info(url)
+
+ # 下载封面和视频
+ cover = self.downloader.download_img(video_info.thumbnail)
+ video = self.downloader.download_video(url, use_ytdlp=True)
+
+ return self.result(
+ title=video_info.title,
+ author=Author(name=video_info.channel),
+ contents=[VideoContent(video, cover, duration=video_info.duration)],
+ timestamp=video_info.timestamp,
+ )
diff --git a/core/parsers/twitter.py b/core/parsers/twitter.py
new file mode 100644
index 0000000..a66775a
--- /dev/null
+++ b/core/parsers/twitter.py
@@ -0,0 +1,133 @@
+import re
+from itertools import chain
+from typing import Any, ClassVar
+
+from aiohttp import ClientError
+from bs4 import BeautifulSoup, Tag
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from ..exception import ParseException
+from .base import BaseParser, PlatformEnum, handle
+from .data import ParseResult, Platform
+
+
+class TwitterParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.TWITTER, display_name="推特")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+
+ async def _req_xdown_api(self, url: str) -> dict[str, Any]:
+ headers = {
+ "Accept": "application/json, text/plain, */*",
+ "Content-Type": "application/x-www-form-urlencoded",
+ "Origin": "https://xdown.app",
+ "Referer": "https://xdown.app/",
+ **self.headers,
+ }
+ data = {"q": url, "lang": "zh-cn"}
+
+ async with self.client.post(
+ "https://xdown.app/api/ajaxSearch",
+ data=data,
+ headers=headers,
+ ) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"xdown API {resp.status} {resp.reason}")
+ return await resp.json()
+
+ @handle("x.com", r"https?://x.com/[0-9-a-zA-Z_]{1,20}/status/([0-9]+)")
+ async def _parse(self, searched: re.Match[str]) -> ParseResult:
+ # 从匹配对象中获取原始URL
+ url = searched.group(0)
+ resp = await self._req_xdown_api(url)
+ if resp.get("status") != "ok":
+ raise ParseException("解析失败")
+
+ html_content = resp.get("data")
+
+ if html_content is None:
+ raise ParseException("解析失败, 数据为空")
+
+ return self.parse_twitter_html(html_content)
+
+ def parse_twitter_html(self, html_content: str) -> ParseResult:
+ """解析 Twitter HTML 内容
+
+ Args:
+ html_content (str): Twitter HTML 内容
+
+ Returns:
+ ParseResult: 解析结果
+ """
+ soup = BeautifulSoup(html_content, "html.parser")
+
+ # 初始化数据
+ title = None
+ cover_url = None
+ video_url = None
+ images_urls = []
+ dynamic_urls = []
+
+ # 1. 提取缩略图链接
+ thumb_tag = soup.find("img")
+ if isinstance(thumb_tag, Tag):
+ if cover := thumb_tag.get("src"):
+ cover_url = str(cover)
+
+ # 2. 提取下载链接
+ tw_button_tags = soup.find_all("a", class_="tw-button-dl")
+ abutton_tags = soup.find_all("a", class_="abutton")
+ for tag in chain(tw_button_tags, abutton_tags):
+ if not isinstance(tag, Tag):
+ continue
+ href = tag.get("href")
+ if href is None:
+ continue
+
+ href = str(href)
+ text = tag.get_text(strip=True)
+ if "下载 MP4" in text:
+ video_url = href
+ break
+ elif "下载图片" in text:
+ images_urls.append(href)
+ elif "下载 gif" in text:
+ dynamic_urls.append(href)
+
+ # 3. 提取标题
+ title_tag = soup.find("h3")
+ if title_tag:
+ title = title_tag.get_text(strip=True)
+
+ # 简洁的构建方式
+ contents = []
+
+ # 添加视频内容
+ if video_url:
+ contents.append(self.create_video_content(video_url, cover_url))
+
+ # 添加图片内容
+ if images_urls:
+ contents.extend(self.create_image_contents(images_urls))
+
+ # 添加动态内容
+ if dynamic_urls:
+ contents.extend(self.create_dynamic_contents(dynamic_urls))
+
+ return self.result(
+ title=title,
+ author=self.create_author("无用户名"),
+ contents=contents,
+ )
+ # # 4. 提取Twitter ID
+ # twitter_id_input = soup.find("input", {"id": "TwitterId"})
+ # if (
+ # twitter_id_input
+ # and isinstance(twitter_id_input, Tag)
+ # and (value := twitter_id_input.get("value"))
+ # and isinstance(value, str)
+ # ):
diff --git a/core/parsers/weibo.py b/core/parsers/weibo.py
new file mode 100644
index 0000000..79847ed
--- /dev/null
+++ b/core/parsers/weibo.py
@@ -0,0 +1,420 @@
+from re import Match, sub
+from time import time
+from typing import ClassVar
+from uuid import uuid4
+
+import msgspec
+from aiohttp import ClientError
+from bs4 import BeautifulSoup, Tag
+from msgspec import Struct
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, ParseException, Platform, PlatformEnum, handle
+from .data import MediaContent
+
+
+class WeiBoParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.WEIBO, display_name="微博")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ extra_headers = {
+ "accept": (
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,"
+ "image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
+ ),
+ "referer": "https://weibo.com/",
+ }
+ self.headers.update(extra_headers)
+
+ # https://weibo.com/tv/show/1034:5007449447661594?mid=5007452630158934
+ @handle("weibo.com/tv", r"weibo\.com/tv/show/\d{4}:\d+\?mid=(?P\d+)")
+ async def _parse_weibo_tv(self, searched: Match[str]):
+ mid = str(searched.group("mid"))
+ weibo_id = self._mid2id(mid)
+ return await self.parse_weibo_id(weibo_id)
+
+ # https://video.weibo.com/show?fid=1034:5145615399845897
+ @handle("video.weibo", r"video\.weibo\.com/show\?fid=(?P\d+:\d+)")
+ async def _parse_video_weibo(self, searched: Match[str]):
+ fid = str(searched.group("fid"))
+ return await self.parse_fid(fid)
+
+ # https://m.weibo.cn/status/5234367615996775
+ # https://m.weibo.cn/detail/4976424138313924
+ @handle("m.weibo.cn", r"m\.weibo\.cn/(?:status|detail)/(?P\d+)")
+ # https://weibo.com/7207262816/P5kWdcfDe
+ @handle("weibo.com", r"weibo\.com/\d+/(?P[0-9a-zA-Z]+)")
+ async def _parse_m_weibo_cn(self, searched: Match[str]):
+ wid = str(searched.group("wid"))
+ return await self.parse_weibo_id(wid)
+
+ # https://mapp.api.weibo.cn/fx/233911ddcc6bffea835a55e725fb0ebc.html
+ @handle("mapp.api.weibo", r"mapp\.api\.weibo\.cn/fx/[A-Za-z\d]+\.html")
+ async def _parse_mapp_api_weibo(self, searched: Match[str]):
+ url = f"https://{searched.group(0)}"
+ return await self.parse_with_redirect(url)
+
+ # https://weibo.com/ttarticle/p/show?id=2309404962180771742222
+ # https://weibo.com/ttarticle/x/m/show#/id=2309404962180771742222
+ @handle("weibo.com/ttarticle", r"id=(?P\d+)")
+ # https://card.weibo.com/article/m/show/id/2309404962180771742222
+ @handle("weibo.com/article", r"/id/(?P\d+)")
+ async def _parse_article(self, searched: Match[str]):
+ _id = searched.group("id")
+ return await self.parse_article(_id)
+
+ async def parse_article(self, _id: str):
+ class UserInfo(Struct):
+ screen_name: str
+ profile_image_url: str
+
+ class Data(Struct):
+ url: str
+ title: str
+ content: str
+ userinfo: UserInfo
+ create_at_unix: int
+
+ class Detail(Struct):
+ code: str
+ msg: str
+ data: Data
+
+ url = "https://card.weibo.com/article/m/aj/detail"
+ params = {
+ "_rid": str(uuid4()),
+ "id": _id,
+ "_t": int(time() * 1000),
+ }
+
+
+ async with self.client.post(
+ url=url,
+ data=params,
+ headers=self.headers,
+ ) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"article API {resp.status} {resp.reason}")
+ detail = msgspec.json.decode(await resp.read(), type=Detail)
+
+ if detail.msg != "success":
+ raise ParseException("请求失败")
+
+ data = detail.data
+
+ soup = BeautifulSoup(data.content, "html.parser")
+ contents: list[MediaContent] = []
+ text_buffer: list[str] = []
+
+ for element in soup.find_all(["p", "img"]):
+ if not isinstance(element, Tag):
+ continue
+
+ if element.name == "p":
+ text = element.get_text(strip=True)
+ # 去除零宽空格
+ text = text.replace("\u200b", "")
+ if text:
+ text_buffer.append(text)
+ elif element.name == "img":
+ src = element.get("src")
+ if isinstance(src, str):
+ text = "\n\n".join(text_buffer)
+ contents.append(self.create_graphics_content(src, text=text))
+ text_buffer.clear()
+
+ author = self.create_author(
+ data.userinfo.screen_name,
+ data.userinfo.profile_image_url,
+ )
+
+ end_text = "\n\n".join(text_buffer) if text_buffer else None
+
+ return self.result(
+ url=data.url,
+ title=data.title,
+ author=author,
+ timestamp=data.create_at_unix,
+ text=end_text,
+ contents=contents,
+ )
+
+ async def parse_fid(self, fid: str):
+ """
+ 解析带 fid 的微博视频
+ """
+
+ req_url = f"https://h5.video.weibo.com/api/component?page=/show/{fid}"
+ headers = {
+ "Referer": f"https://h5.video.weibo.com/show/{fid}",
+ "Content-Type": "application/x-www-form-urlencoded",
+ **self.headers,
+ }
+ post_content = 'data={"Component_Play_Playinfo":{"oid":"' + fid + '"}}'
+
+ async with self.client.post(
+ req_url,
+ data=post_content,
+ headers=headers,
+ ) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"video API {resp.status} {resp.reason}")
+ json_data = await resp.json()
+
+ data = json_data.get("data", {}).get("Component_Play_Playinfo", {})
+ if not data:
+ raise ParseException("Component_Play_Playinfo 数据为空")
+ # 提取作者
+ user = data.get("reward", {}).get("user", {})
+ author_name, avatar, description = (
+ user.get("name", "未知"),
+ user.get("profile_image_url"),
+ user.get("description"),
+ )
+ author = self.create_author(author_name, avatar, description)
+
+ # 提取标题和文本
+ title, text = data.get("title", ""), data.get("text", "")
+ if text:
+ text = sub(r"<[^>]*>", "", text)
+ text = text.replace("\n\n", "").strip()
+
+ # 获取封面
+ cover_url = data.get("cover_image")
+ if cover_url:
+ cover_url = "https:" + cover_url
+
+ # 获取视频下载链接
+ contents = []
+ video_url_dict = data.get("urls")
+ if video_url_dict and isinstance(video_url_dict, dict):
+ # stream_url码率最低,urls中第一条码率最高
+ first_mp4_url: str = next(iter(video_url_dict.values()))
+ video_url = "https:" + first_mp4_url
+ else:
+ video_url = data.get("stream_url")
+
+ if video_url:
+ contents.append(self.create_video_content(video_url, cover_url))
+
+ # 时间戳
+ timestamp = data.get("real_date")
+
+ return self.result(
+ title=title,
+ text=text,
+ author=author,
+ contents=contents,
+ timestamp=timestamp,
+ )
+
+ async def parse_weibo_id(self, weibo_id: str):
+ """解析微博 id (无 Cookie + 伪装 XHR + 不跟随重定向)"""
+ headers = {
+ "accept": "application/json, text/plain, */*",
+ "referer": f"https://m.weibo.cn/detail/{weibo_id}",
+ "origin": "https://m.weibo.cn",
+ "x-requested-with": "XMLHttpRequest",
+ "mweibo-pwa": "1",
+ "sec-fetch-site": "same-origin",
+ "sec-fetch-mode": "cors",
+ "sec-fetch-dest": "empty",
+ **self.headers,
+ }
+
+ # 加时间戳参数,减少被缓存/规则命中的概率
+ ts = int(time() * 1000)
+ url = f"https://m.weibo.cn/statuses/show?id={weibo_id}&_={ts}"
+
+ # 关键:不带 cookie、不跟随重定向(避免二跳携 cookie)
+ async with self.client.get(
+ url=url,
+ headers=headers,
+ allow_redirects=False,
+ ) as resp:
+ if resp.status != 200:
+ if resp.status in (403, 418):
+ raise ParseException(f"被风控拦截({resp.status}),可尝试更换 UA/Referer 或稍后重试")
+ raise ParseException(f"获取数据失败 {resp.status} {resp.reason}")
+
+ ctype = resp.headers.get("content-type", "")
+ if "application/json" not in ctype:
+ raise ParseException(f"获取数据失败 content-type is not application/json (got: {ctype})")
+
+ # 用 bytes 更稳,避免编码歧义
+ weibo_data = msgspec.json.decode(await resp.read(), type=WeiboResponse).data
+
+ return self.build_weibo_data(weibo_data)
+
+ def build_weibo_data(self, data: "WeiboData"):
+ contents = []
+
+ # 添加视频内容
+ if video_url := data.video_url:
+ cover_url = data.cover_url
+ contents.append(self.create_video_content(video_url, cover_url))
+
+ # 添加图片内容
+ if image_urls := data.image_urls:
+ contents.extend(self.create_image_contents(image_urls))
+
+ # 构建作者
+ author = self.create_author(data.display_name, data.user.profile_image_url)
+ repost = None
+ if data.retweeted_status:
+ repost = self.build_weibo_data(data.retweeted_status)
+
+ return self.result(
+ title=data.title,
+ text=data.text_content,
+ author=author,
+ contents=contents,
+ timestamp=data.timestamp,
+ url=data.url,
+ repost=repost,
+ )
+
+ def _base62_encode(self, number: int) -> str:
+ """将数字转换为 base62 编码"""
+ alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ if number == 0:
+ return "0"
+
+ result = ""
+ while number > 0:
+ result = alphabet[number % 62] + result
+ number //= 62
+
+ return result
+
+ def _mid2id(self, mid: str) -> str:
+ """将微博 mid 转换为 id"""
+ from math import ceil
+
+ mid = str(mid)[::-1] # 反转输入字符串
+ size = ceil(len(mid) / 7) # 计算每个块的大小
+ result = []
+
+ for i in range(size):
+ # 对每个块进行处理并反转
+ s = mid[i * 7 : (i + 1) * 7][::-1]
+ # 将字符串转为整数后进行 base62 编码
+ s = self._base62_encode(int(s))
+ # 如果不是最后一个块并且长度不足4位,进行左侧补零操作
+ if i < size - 1 and len(s) < 4:
+ s = "0" * (4 - len(s)) + s
+ result.append(s)
+
+ result.reverse() # 反转结果数组
+ return "".join(result) # 将结果数组连接成字符串
+
+
+
+
+
+class LargeInPic(Struct):
+ url: str
+
+
+class Pic(Struct):
+ url: str
+ large: LargeInPic
+
+
+class Urls(Struct):
+ mp4_720p_mp4: str | None = None
+ mp4_hd_mp4: str | None = None
+ mp4_ld_mp4: str | None = None
+
+ def get_video_url(self) -> str | None:
+ return self.mp4_720p_mp4 or self.mp4_hd_mp4 or self.mp4_ld_mp4 or None
+
+
+class PagePic(Struct):
+ url: str
+
+
+class PageInfo(Struct):
+ title: str | None = None
+ urls: Urls | None = None
+ page_pic: PagePic | None = None
+
+
+class User(Struct):
+ id: int
+ screen_name: str
+ """用户昵称"""
+ profile_image_url: str
+ """头像"""
+
+
+class WeiboData(Struct):
+ user: User
+ text: str
+ # source: str # 如 微博网页版
+ # region_name: str | None = None
+
+ bid: str
+ created_at: str
+ """发布时间 格式: `Thu Oct 02 14:39:33 +0800 2025`"""
+
+ status_title: str | None = None
+ pics: list[Pic] | None = None
+ page_info: PageInfo | None = None
+ retweeted_status: "WeiboData | None" = None # 转发微博
+
+ @property
+ def title(self) -> str | None:
+ return self.page_info.title if self.page_info else None
+
+ @property
+ def display_name(self) -> str:
+ return self.user.screen_name
+
+ @property
+ def text_content(self) -> str:
+ # 将
转换为 \n
+ text = self.text.replace("
", "\n")
+ # 去除 html 标签
+ text = sub(r"<[^>]*>", "", text)
+ return text
+
+ @property
+ def cover_url(self) -> str | None:
+ if self.page_info is None:
+ return None
+ if self.page_info.page_pic:
+ return self.page_info.page_pic.url
+ return None
+
+ @property
+ def video_url(self) -> str | None:
+ if self.page_info and self.page_info.urls:
+ return self.page_info.urls.get_video_url()
+ return None
+
+ @property
+ def image_urls(self) -> list[str]:
+ if self.pics:
+ return [x.large.url for x in self.pics]
+ return []
+
+ @property
+ def url(self) -> str:
+ return f"https://weibo.com/{self.user.id}/{self.bid}"
+
+ @property
+ def timestamp(self) -> int:
+ from time import mktime, strptime
+
+ create_at = strptime(self.created_at, "%a %b %d %H:%M:%S %z %Y")
+ return int(mktime(create_at))
+
+
+class WeiboResponse(Struct):
+ ok: int
+ data: WeiboData
diff --git a/core/parsers/xiaohongshu.py b/core/parsers/xiaohongshu.py
new file mode 100644
index 0000000..3aa8551
--- /dev/null
+++ b/core/parsers/xiaohongshu.py
@@ -0,0 +1,247 @@
+import json
+import re
+from typing import Any, ClassVar
+
+from msgspec import Struct, convert, field
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, ParseException, Platform, PlatformEnum, handle
+
+
+class XiaoHongShuParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.XIAOHONGSHU, display_name="小红书")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ explore_headers = {
+ "accept": (
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,"
+ "image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
+ )
+ }
+ self.headers.update(explore_headers)
+ discovery_headers = {
+ "origin": "https://www.xiaohongshu.com",
+ "x-requested-with": "XMLHttpRequest",
+ "sec-fetch-site": "same-origin",
+ "sec-fetch-mode": "cors",
+ "sec-fetch-dest": "empty",
+ }
+ self.ios_headers.update(discovery_headers)
+
+ @handle("xhslink.com", r"xhslink\.com/[A-Za-z0-9._?%&+=/#@-]*")
+ async def _parse_short_link(self, searched: re.Match[str]):
+ url = f"https://{searched.group(0)}"
+ return await self.parse_with_redirect(url, self.ios_headers)
+
+ # https://www.xiaohongshu.com/explore/68feefe40000000007030c4a?xsec_token=ABjAKjfMHJ7ck4UjPlugzVqMb35utHMRe_vrgGJ2AwJnc=&xsec_source=pc_feed
+ @handle(
+ "hongshu.com/explore",
+ r"explore/(?P[0-9a-zA-Z]+)\?[A-Za-z0-9._%&+=/#@-]*",
+ )
+ async def _parse_explore(self, searched: re.Match[str]):
+ url = f"https://www.xiaohongshu.com/{searched.group(0)}"
+ xhs_id = searched.group("xhs_id")
+ return await self.parse_explore(url, xhs_id)
+
+ # https://www.xiaohongshu.com/discovery/item/68e8e3fa00000000030342ec?app_platform=android&ignoreEngage=true&app_version=9.6.0&share_from_user_hidden=true&xsec_source=app_share&type=normal&xsec_token=CBW9rwIV2qhcCD-JsQAOSHd2tTW9jXAtzqlgVXp6c52Sw%3D&author_share=1&xhsshare=QQ&shareRedId=ODs3RUk5ND42NzUyOTgwNjY3OTo8S0tK&apptime=1761372823&share_id=3b61945239ac403db86bea84a4f15124&share_channel=qq
+ @handle(
+ "hongshu.com/discovery/item/",
+ r"discovery/item/(?P[0-9a-zA-Z]+)\?[A-Za-z0-9._%&+=/#@-]*",
+ )
+ async def _parse_discovery(self, searched: re.Match[str]):
+ route = searched.group(0)
+ explore_route = route.replace("discovery/item", "explore", 1)
+ xhs_id = searched.group("xhs_id")
+
+ try:
+ return await self.parse_explore(f"https://www.xiaohongshu.com/{explore_route}", xhs_id)
+ except ParseException:
+ logger.debug("parse_explore failed, fallback to parse_discovery")
+ return await self.parse_discovery(f"https://www.xiaohongshu.com/{route}")
+
+ async def parse_explore(self, url: str, xhs_id: str):
+ async with self.client.get(url, headers=self.headers) as resp:
+ html = await resp.text()
+ logger.debug(f"url: {resp.url} | status: {resp.status}")
+
+ json_obj = self._extract_initial_state_json(html)
+
+ # ["note"]["noteDetailMap"][xhs_id]["note"]
+ note_data = json_obj.get("note", {}).get("noteDetailMap", {}).get(xhs_id, {}).get("note", {})
+ if not note_data:
+ raise ParseException("can't find note detail in json_obj")
+
+ class Image(Struct):
+ urlDefault: str
+
+ class User(Struct):
+ nickname: str
+ avatar: str
+
+ class NoteDetail(Struct):
+ type: str
+ title: str
+ desc: str
+ user: User
+ imageList: list[Image] = field(default_factory=list)
+ video: Video | None = None
+
+ @property
+ def nickname(self) -> str:
+ return self.user.nickname
+
+ @property
+ def avatar_url(self) -> str:
+ return self.user.avatar
+
+ @property
+ def image_urls(self) -> list[str]:
+ return [item.urlDefault for item in self.imageList]
+
+ @property
+ def video_url(self) -> str | None:
+ if self.type != "video" or not self.video:
+ return None
+ return self.video.video_url
+
+ note_detail = convert(note_data, type=NoteDetail)
+
+ contents = []
+ # 添加视频内容
+ if video_url := note_detail.video_url:
+ # 使用第一张图片作为封面
+ cover_url = note_detail.image_urls[0] if note_detail.image_urls else None
+ contents.append(self.create_video_content(video_url, cover_url))
+
+ # 添加图片内容
+ elif image_urls := note_detail.image_urls:
+ contents.extend(self.create_image_contents(image_urls))
+
+ # 构建作者
+ author = self.create_author(note_detail.nickname, note_detail.avatar_url)
+
+ return self.result(
+ title=note_detail.title,
+ text=note_detail.desc,
+ author=author,
+ contents=contents,
+ )
+
+ async def parse_discovery(self, url: str):
+ async with self.client.get(
+ url,
+ headers=self.ios_headers,
+ allow_redirects=True,
+ ) as resp:
+ html = await resp.text()
+
+ json_obj = self._extract_initial_state_json(html)
+ note_data = json_obj.get("noteData")
+ if not note_data:
+ raise ParseException("can't find noteData in json_obj")
+ preload_data = note_data.get("normalNotePreloadData", {})
+ note_data = note_data.get("data", {}).get("noteData", {})
+ if not note_data:
+ raise ParseException("can't find noteData in noteData.data")
+
+ class Image(Struct):
+ url: str
+ urlSizeLarge: str | None = None
+
+ class User(Struct):
+ nickName: str
+ avatar: str
+
+ class NoteData(Struct):
+ type: str
+ title: str
+ desc: str
+ user: User
+ time: int
+ lastUpdateTime: int
+ imageList: list[Image] = [] # 有水印
+ video: Video | None = None
+
+ @property
+ def image_urls(self) -> list[str]:
+ return [item.url for item in self.imageList]
+
+ @property
+ def video_url(self) -> str | None:
+ if self.type != "video" or not self.video:
+ return None
+ return self.video.video_url
+
+ class NormalNotePreloadData(Struct):
+ title: str
+ desc: str
+ imagesList: list[Image] = [] # 无水印, 但只有一只,用于视频封面
+
+ @property
+ def image_urls(self) -> list[str]:
+ return [item.urlSizeLarge or item.url for item in self.imagesList]
+
+ note_data = convert(note_data, type=NoteData)
+
+ contents = []
+ if video_url := note_data.video_url:
+ if preload_data:
+ preload_data = convert(preload_data, type=NormalNotePreloadData)
+ img_urls = preload_data.image_urls
+ else:
+ img_urls = note_data.image_urls
+ contents.append(self.create_video_content(video_url, img_urls[0]))
+ elif img_urls := note_data.image_urls:
+ contents.extend(self.create_image_contents(img_urls))
+
+ return self.result(
+ title=note_data.title,
+ author=self.create_author(note_data.user.nickName, note_data.user.avatar),
+ contents=contents,
+ text=note_data.desc,
+ timestamp=note_data.time // 1000,
+ )
+
+ def _extract_initial_state_json(self, html: str) -> dict[str, Any]:
+ pattern = r"window\.__INITIAL_STATE__=(.*?)"
+ matched = re.search(pattern, html)
+ if not matched:
+ raise ParseException("小红书分享链接失效或内容已删除")
+
+ json_str = matched.group(1).replace("undefined", "null")
+ return json.loads(json_str)
+
+
+class Stream(Struct):
+ h264: list[dict[str, Any]] | None = None
+ h265: list[dict[str, Any]] | None = None
+ av1: list[dict[str, Any]] | None = None
+ h266: list[dict[str, Any]] | None = None
+
+
+class Media(Struct):
+ stream: Stream
+
+
+class Video(Struct):
+ media: Media
+
+ @property
+ def video_url(self) -> str | None:
+ stream = self.media.stream
+
+ # h264 有水印,h265 无水印
+ if stream.h265:
+ return stream.h265[0]["masterUrl"]
+ elif stream.h264:
+ return stream.h264[0]["masterUrl"]
+ elif stream.av1:
+ return stream.av1[0]["masterUrl"]
+ elif stream.h266:
+ return stream.h266[0]["masterUrl"]
+ return None
diff --git a/core/parsers/youtube.py b/core/parsers/youtube.py
new file mode 100644
index 0000000..f2fc438
--- /dev/null
+++ b/core/parsers/youtube.py
@@ -0,0 +1,166 @@
+import re
+from pathlib import Path
+from typing import ClassVar
+
+import msgspec
+from aiohttp import ClientError
+from msgspec import Struct
+
+from astrbot.core.config.astrbot_config import AstrBotConfig
+
+from ..download import Downloader
+from .base import BaseParser, Platform, PlatformEnum, handle
+
+
+class YouTubeParser(BaseParser):
+ # 平台信息
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.YOUTUBE, display_name="油管")
+
+ def __init__(self, config: AstrBotConfig, downloader: Downloader):
+ super().__init__(config, downloader)
+ self.ytb_cookies_file = Path(self.config["ytb_cookies_file"]) or None
+ self.max_duration = config["max_duration"]
+
+ @handle("youtu.be", r"https?://(?:www\.)?youtu\.be/[A-Za-z\d\._\?%&\+\-=/#]+")
+ @handle(
+ "youtube.com",
+ r"https?://(?:www\.)?youtube\.com/(?:watch|shorts)(?:/[A-Za-z\d_\-]+|\?v=[A-Za-z\d_\-]+)",
+ )
+ async def _parse_video(self, searched: re.Match[str]):
+ return await self.parse_video(searched)
+
+ async def parse_video(self, searched: re.Match[str]):
+ # 从匹配对象中获取原始URL
+ url = searched.group(0)
+
+ video_info = await self.downloader.ytdlp_extract_info(
+ url, cookiefile=self.ytb_cookies_file
+ )
+ author = await self._fetch_author_info(video_info.channel_id)
+
+ contents = []
+ if video_info.duration <= self.max_duration:
+ video = self.downloader.download_video(
+ url, use_ytdlp=True, cookiefile=self.ytb_cookies_file
+ )
+ contents.append(
+ self.create_video_content(
+ video,
+ video_info.thumbnail,
+ video_info.duration,
+ )
+ )
+ else:
+ contents.extend(self.create_image_contents([video_info.thumbnail]))
+
+ return self.result(
+ title=video_info.title,
+ author=author,
+ contents=contents,
+ timestamp=video_info.timestamp,
+ )
+
+ async def parse_audio(self, url: str):
+ """解析 YouTube URL 并标记为音频下载
+
+ Args:
+ url: YouTube 链接
+
+ Returns:
+ ParseResult: 解析结果(音频内容)
+
+ """
+ video_info = await self.downloader.ytdlp_extract_info(url, self.ytb_cookies_file)
+ author = await self._fetch_author_info(video_info.channel_id)
+
+ contents = []
+ contents.extend(self.create_image_contents([video_info.thumbnail]))
+
+ if video_info.duration <= self.max_duration:
+ audio_task = self.downloader.download_audio(
+ url, use_ytdlp=True, cookiefile=self.ytb_cookies_file
+ )
+ contents.append(self.create_audio_content(audio_task, duration=video_info.duration))
+
+ return self.result(
+ title=video_info.title,
+ author=author,
+ contents=contents,
+ timestamp=video_info.timestamp,
+ )
+
+ async def _fetch_author_info(self, channel_id: str):
+ url = "https://www.youtube.com/youtubei/v1/browse?prettyPrint=false"
+ payload = {
+ "context": {
+ "client": {
+ "hl": "zh-HK",
+ "gl": "US",
+ "deviceMake": "Apple",
+ "deviceModel": "",
+ "clientName": "WEB",
+ "clientVersion": "2.20251002.00.00",
+ "osName": "Macintosh",
+ "osVersion": "10_15_7",
+ },
+ "user": {"lockedSafetyMode": False},
+ "request": {
+ "useSsl": True,
+ "internalExperimentFlags": [],
+ "consistencyTokenJars": [],
+ },
+ },
+ "browseId": channel_id,
+ }
+ async with self.client.post(
+ url,
+ json=payload,
+ headers=self.headers,
+ ) as resp:
+ if resp.status >= 400:
+ raise ClientError(f"YouTube browse API {resp.status} {resp.reason}")
+ browse = msgspec.json.decode(await resp.read(), type=BrowseResponse)
+
+ return self.create_author(browse.name, browse.avatar_url, browse.description)
+
+
+
+
+
+class Thumbnail(Struct):
+ url: str
+
+
+class AvatarInfo(Struct):
+ thumbnails: list[Thumbnail]
+
+
+class ChannelMetadataRenderer(Struct):
+ title: str
+ description: str
+ avatar: AvatarInfo
+
+
+class Metadata(Struct):
+ channelMetadataRenderer: ChannelMetadataRenderer
+
+
+class Avatar(Struct):
+ thumbnails: list[Thumbnail]
+
+
+class BrowseResponse(Struct):
+ metadata: Metadata
+
+ @property
+ def name(self) -> str:
+ return self.metadata.channelMetadataRenderer.title
+
+ @property
+ def avatar_url(self) -> str | None:
+ thumbnails = self.metadata.channelMetadataRenderer.avatar.thumbnails
+ return thumbnails[0].url if thumbnails else None
+
+ @property
+ def description(self) -> str:
+ return self.metadata.channelMetadataRenderer.description
diff --git a/core/render.py b/core/render.py
new file mode 100644
index 0000000..8f06cc1
--- /dev/null
+++ b/core/render.py
@@ -0,0 +1,1455 @@
+import uuid
+from collections.abc import AsyncGenerator, Awaitable, Callable
+from dataclasses import dataclass
+from functools import lru_cache, wraps
+from io import BytesIO
+from itertools import chain
+from pathlib import Path
+from typing import ClassVar, ParamSpec, TypeVar
+
+import aiofiles
+from apilmoji import Apilmoji, EmojiCDNSource
+from apilmoji.core import get_font_height
+from PIL import Image, ImageDraw, ImageFont
+
+from astrbot.api import logger
+from astrbot.core.config.astrbot_config import AstrBotConfig
+from astrbot.core.message.components import BaseMessageComponent, Plain, Record, Video
+from astrbot.core.message.components import Image as AstrImage
+
+from .exception import DownloadException, DownloadLimitException, ZeroSizeException
+from .parsers import (
+ AudioContent,
+ DynamicContent,
+ GraphicsContent,
+ ImageContent,
+ ParseResult,
+ VideoContent,
+)
+from .utils import construct_forward_message
+
+# 定义类型变量
+P = ParamSpec("P")
+T = TypeVar("T")
+
+Color = tuple[int, int, int]
+PILImage = Image.Image
+
+
+def suppress_exception(
+ func: Callable[P, T],
+) -> Callable[P, T | None]:
+ """装饰器:捕获所有异常并返回 None"""
+
+ @wraps(func)
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T | None:
+ try:
+ return func(*args, **kwargs)
+ except Exception as e:
+ logger.debug(f"函数 {func.__name__} 执行失败: {e}")
+ return None
+
+ return wrapper
+
+
+def suppress_exception_async(
+ func: Callable[P, Awaitable[T]],
+) -> Callable[P, Awaitable[T | None]]:
+ """装饰器:捕获所有异常并返回 None"""
+
+ @wraps(func)
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T | None:
+ try:
+ return await func(*args, **kwargs)
+ except Exception as e:
+ logger.debug(f"函数 {func.__name__} 执行失败: {e}")
+ return None
+
+ return wrapper
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class FontInfo:
+ """字体信息数据类"""
+
+ font: ImageFont.FreeTypeFont
+ line_height: int
+ cjk_width: int
+
+ def __hash__(self) -> int:
+ """实现哈希方法以支持 @lru_cache"""
+ return hash((id(self.font), self.line_height, self.cjk_width))
+
+ @lru_cache(maxsize=400)
+ def get_char_width(self, char: str) -> int:
+ """获取字符宽度,使用缓存优化"""
+ # bbox = self.font.getbbox(char)
+ # width = int(bbox[2] - bbox[0])
+ # return width
+ return int(self.font.getlength(char))
+
+ def get_char_width_fast(self, char: str) -> int:
+ """快速获取单个字符宽度"""
+ if "\u4e00" <= char <= "\u9fff":
+ return self.cjk_width
+ else:
+ return self.get_char_width(char)
+
+ def get_text_width(self, text: str) -> int:
+ """计算文本宽度,使用预计算的字符宽度优化性能
+
+ Args:
+ text: 要计算宽度的文本
+
+ Returns:
+ 文本宽度(像素)
+ """
+ if not text:
+ return 0
+
+ total_width = 0
+ for char in text:
+ total_width += self.get_char_width_fast(char)
+ return total_width
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class FontSet:
+ """字体集数据类"""
+
+ _FONT_SIZES = (
+ ("name", 28),
+ ("title", 30),
+ ("text", 24),
+ ("extra", 24),
+ ("indicator", 60),
+ )
+ """字体大小"""
+
+ name_font: FontInfo
+ title_font: FontInfo
+ text_font: FontInfo
+ extra_font: FontInfo
+ indicator_font: FontInfo
+
+ @classmethod
+ def new(cls, font_path: Path):
+ font_infos: dict[str, FontInfo] = {}
+ for name, size in cls._FONT_SIZES:
+ font = ImageFont.truetype(font_path, size)
+ font_infos[f"{name}_font"] = FontInfo(
+ font=font,
+ line_height=get_font_height(font),
+ cjk_width=size,
+ )
+ return FontSet(**font_infos)
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class SectionData:
+ """基础部分数据类"""
+
+ height: int
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class HeaderSectionData(SectionData):
+ """Header 部分数据"""
+
+ avatar: PILImage | None
+ name_lines: list[str]
+ time_lines: list[str]
+ text_height: int
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class TitleSectionData(SectionData):
+ """标题部分数据"""
+
+ lines: list[str]
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class CoverSectionData(SectionData):
+ """封面部分数据"""
+
+ cover_img: PILImage
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class TextSectionData(SectionData):
+ """文本部分数据"""
+
+ lines: list[str]
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class ExtraSectionData(SectionData):
+ """额外信息部分数据"""
+
+ lines: list[str]
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class RepostSectionData(SectionData):
+ """转发部分数据"""
+
+ scaled_image: PILImage
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class ImageGridSectionData(SectionData):
+ """图片网格部分数据"""
+
+ images: list[PILImage]
+ cols: int
+ rows: int
+ has_more: bool
+ remaining_count: int
+
+
+@dataclass(eq=False, frozen=True, slots=True)
+class GraphicsSectionData(SectionData):
+ """图文内容部分数据"""
+
+ text_lines: list[str]
+ image: PILImage
+ alt_text: str | None = None
+
+
+@dataclass
+class RenderContext:
+ """渲染上下文,存储渲染过程中的状态信息"""
+
+ result: ParseResult
+ """解析结果"""
+ card_width: int
+ """卡片宽度"""
+ content_width: int
+ """内容宽度"""
+ image: PILImage
+ """当前图像"""
+ draw: ImageDraw.ImageDraw
+ """绘图对象"""
+ not_repost: bool = True
+ """是否为非转发内容"""
+ y_pos: int = 0
+ """当前绘制位置(绘制阶段使用)"""
+
+
+class CommonRenderer:
+ """统一的渲染器,将解析结果转换为消息"""
+
+ # 卡片配置常量
+ PADDING = 25
+ """内边距"""
+ AVATAR_SIZE = 80
+ """头像大小"""
+ AVATAR_TEXT_GAP = 15
+ """头像和文字之间的间距"""
+ MAX_COVER_WIDTH = 1000
+ """封面最大宽度"""
+ MAX_COVER_HEIGHT = 800
+ """封面最大高度"""
+ DEFAULT_CARD_WIDTH = 800
+ """默认卡片宽度"""
+ MIN_CARD_WIDTH = 400
+ """最小卡片宽度"""
+ SECTION_SPACING = 15
+ """部分间距"""
+ NAME_TIME_GAP = 5
+ """名称和时间之间的间距"""
+ AVATAR_UPSCALE_FACTOR = 2
+ """头像圆形框超采样倍数"""
+
+ # 图片处理配置
+ MIN_COVER_WIDTH = 300
+ """最小封面宽度"""
+ MIN_COVER_HEIGHT = 200
+ """最小封面高度"""
+ MAX_IMAGE_HEIGHT = 800
+ """图片最大高度限制"""
+ IMAGE_3_GRID_SIZE = 300
+ """图片3列网格最大尺寸"""
+ IMAGE_2_GRID_SIZE = 400
+ """图片2列网格最大尺寸"""
+ IMAGE_GRID_SPACING = 4
+ """图片网格间距"""
+ MAX_IMAGES_DISPLAY = 9
+ """最大显示图片数量"""
+ IMAGE_GRID_COLS = 3
+ """图片网格列数"""
+
+ # 转发内容配置
+ REPOST_PADDING = 12
+ """转发内容内边距"""
+ REPOST_SCALE = 0.88
+ """转发缩放比例"""
+
+ # 资源名称
+ _EMOJIS = "emojis"
+ _RESOURCES = "resources"
+ _BUTTON_FILENAME = "media_button.png"
+ _FONT_FILENAME = "HYSongYunLangHeiW-1.ttf"
+
+ # 颜色配置
+ BG_COLOR: ClassVar[Color] = (255, 255, 255)
+ """背景色"""
+ TEXT_COLOR: ClassVar[Color] = (51, 51, 51)
+ """文本色"""
+ HEADER_COLOR: ClassVar[Color] = (0, 122, 255)
+ """标题色"""
+ EXTRA_COLOR: ClassVar[Color] = (136, 136, 136)
+ """额外信息色"""
+ REPOST_BG_COLOR: ClassVar[Color] = (247, 247, 247)
+ """转发背景色"""
+ REPOST_BORDER_COLOR: ClassVar[Color] = (230, 230, 230)
+ """转发边框色"""
+
+ # 路径配置
+ RESOURCES_DIR: ClassVar[Path] = Path(__file__).parent / _RESOURCES
+ """资源目录"""
+ DEFAULT_FONT_PATH: ClassVar[Path] = RESOURCES_DIR / _FONT_FILENAME
+ """默认字体路径"""
+ DEFAULT_VIDEO_BUTTON_PATH: ClassVar[Path] = RESOURCES_DIR / _BUTTON_FILENAME
+ """默认视频按钮路径"""
+
+ def __init__(self, config: AstrBotConfig):
+ self.config = config
+ self.cache_dir = Path(config["cache_dir"])
+ self.EMOJI_SOURCE = EmojiCDNSource(
+ base_url=config["emoji_cdn"],
+ style=config["emoji_style"],
+ cache_dir=self.cache_dir / self._EMOJIS,
+ enable_tqdm=True,
+ )
+ """Emoji Source"""
+ @classmethod
+ def load_resources(cls):
+ """加载资源"""
+ cls._load_fonts()
+ cls._load_video_button()
+ cls._load_platform_logos()
+
+ @classmethod
+ def _load_fonts(cls):
+ """预加载自定义字体"""
+
+ font_path = cls.DEFAULT_FONT_PATH
+ # 创建 FontSet 对象
+ cls.fontset = FontSet.new(font_path)
+ logger.debug(f"加载字体「{font_path.name}」成功")
+
+ @classmethod
+ def _load_video_button(cls):
+ """预加载视频按钮"""
+ with Image.open(cls.DEFAULT_VIDEO_BUTTON_PATH) as img:
+ cls.video_button_image: PILImage = img.convert("RGBA")
+
+ # 设置透明度为 30%
+ alpha = cls.video_button_image.split()[-1] # 获取 alpha 通道
+ alpha = alpha.point(lambda x: int(x * 0.3)) # 将透明度设置为 30%
+ cls.video_button_image.putalpha(alpha)
+
+ @classmethod
+ def _load_platform_logos(cls):
+ """预加载平台 logo"""
+ from .constants import PlatformEnum
+
+ cls.platform_logos: dict[str, PILImage] = {}
+ for platform_name in PlatformEnum:
+ logo_path = cls.RESOURCES_DIR / f"{platform_name}.png"
+ if logo_path.exists():
+ with Image.open(logo_path) as img:
+ cls.platform_logos[str(platform_name)] = img.convert("RGBA")
+
+
+ async def text(
+ self,
+ ctx: RenderContext,
+ xy: tuple[int, int],
+ lines: list[str],
+ font: FontInfo,
+ fill: Color,
+ ) -> int:
+ """绘制文本"""
+ await Apilmoji.text(
+ ctx.image,
+ xy,
+ lines,
+ font.font,
+ fill=fill,
+ line_height=font.line_height,
+ source=self.EMOJI_SOURCE,
+ )
+ return font.line_height * len(lines)
+
+
+ async def render_messages(self, result: ParseResult):
+ """渲染消息
+
+ Args:
+ result (ParseResult): 解析结果
+ """
+ yield [await self.cache_or_render_image(result)]
+
+ # 媒体内容
+ async for message in self.render_contents(result):
+ yield message
+
+ async def cache_or_render_image(self, result: ParseResult) -> AstrImage:
+ """获取缓存图片
+
+ Args:
+ result (ParseResult): 解析结果
+
+ Returns:
+ Image: 图片组件
+ """
+ if result.render_image is None:
+ image_raw = await self.render_image(result)
+ image_path = await self.save_img(image_raw)
+ result.render_image = image_path
+
+ return AstrImage(str(result.render_image))
+
+
+ async def save_img(self, raw: bytes) -> Path:
+ """保存图片
+
+ Args:
+ raw (bytes): 图片字节
+
+ Returns:
+ Path: 图片路径
+ """
+ file_name = f"{uuid.uuid4().hex}.png"
+ image_path = self.cache_dir / file_name
+ async with aiofiles.open(image_path, "wb+") as f:
+ await f.write(raw)
+ return image_path
+
+ async def render_contents(
+ self, result: ParseResult
+ ) -> AsyncGenerator[list[BaseMessageComponent], None]:
+ """渲染媒体内容消息
+
+ Args:
+ result (ParseResult): 解析结果
+
+ Returns:
+ AsyncGenerator[UniMessage[Any], None]: 消息生成器
+ """
+ failed_count = 0
+ forward_segs: list[BaseMessageComponent] = []
+ dynamic_segs: list[BaseMessageComponent] = []
+
+ for cont in chain(
+ result.contents, result.repost.contents if result.repost else ()
+ ):
+ try:
+ path = await cont.get_path()
+ # 继续渲染其他内容, 类似之前 gather (return_exceptions=True) 的处理
+ except (DownloadLimitException, ZeroSizeException):
+ # 预期异常,不抛出
+ # yield UniMessage(e.message)
+ continue
+ except DownloadException:
+ failed_count += 1
+ continue
+
+ match cont:
+ case VideoContent():
+ yield [Video(str(path))]
+ case AudioContent():
+ yield [Record(str(path))]
+ case ImageContent():
+ forward_segs.append(AstrImage(str(path)))
+ case DynamicContent():
+ dynamic_segs.append(Video(str(path)))
+ case GraphicsContent() as graphics:
+ forward_segs.append(AstrImage(str(path)))
+ if graphics.text:
+ forward_segs.append(Plain(graphics.text))
+ if graphics.alt:
+ forward_segs.append(Plain(graphics.alt))
+
+ if forward_segs:
+ if result.text:
+ forward_segs.append(Plain(result.text))
+
+ if self.config["forward_contents"] or len(forward_segs) > 4:
+ forward_msg = construct_forward_message(
+ forward_segs + dynamic_segs
+ )
+ yield [forward_msg]
+ else:
+ yield forward_segs
+
+ if dynamic_segs:
+ yield [construct_forward_message(dynamic_segs)]
+
+ if failed_count > 0:
+ message = f"{failed_count} 项媒体下载失败"
+ yield [Plain(message)]
+ raise DownloadException(message)
+
+ async def render_image(self, result: ParseResult) -> bytes:
+ """使用 PIL 绘制通用社交媒体帖子卡片
+
+ Args:
+ result: 解析结果
+
+ Returns:
+ PNG 图片的字节数据,如果没有足够的内容则返回 None
+ """
+ # 调用内部方法生成图片
+ image = await self._create_card_image(result)
+
+ # 将图片转换为字节
+ output = BytesIO()
+ image.save(output, format="PNG")
+ return output.getvalue()
+
+ async def _create_card_image(
+ self,
+ result: ParseResult,
+ not_repost: bool = True,
+ ) -> PILImage:
+ """创建卡片图片(内部方法,用于递归调用)
+
+ Args:
+ result: 解析结果
+ not_repost: 是否为非转发内容,转发内容为 False
+
+ Returns:
+ PIL Image 对象
+ """
+ # 计算必要参数
+ card_width = self.DEFAULT_CARD_WIDTH
+ content_width = card_width - 2 * self.PADDING
+
+ # 计算各部分内容的高度
+ sections = await self._calculate_sections(result, content_width)
+
+ # 计算总高度
+ card_height = sum(section.height for section in sections)
+ card_height += self.PADDING * 2 + self.SECTION_SPACING * (len(sections) - 1)
+
+ # 创建画布
+ bg_color = self.BG_COLOR if not_repost else self.REPOST_BG_COLOR
+ image = Image.new(
+ "RGB",
+ (card_width, card_height),
+ bg_color,
+ )
+
+ # 创建完整的渲染上下文
+ ctx = RenderContext(
+ result=result,
+ card_width=card_width,
+ content_width=content_width,
+ image=image,
+ draw=ImageDraw.Draw(image),
+ not_repost=not_repost,
+ y_pos=self.PADDING, # 以 padding 作为起始
+ )
+ # 绘制各部分内容
+ await self._draw_sections(ctx, sections)
+ return image
+
+ @suppress_exception
+ def _load_and_resize_cover(
+ self,
+ cover_path: Path | None,
+ content_width: int,
+ ) -> PILImage | None:
+ """加载并调整封面尺寸
+
+ Args:
+ cover_path: 封面路径
+ content_width: 内容区域宽度, 封面会缩放到此宽度以确保左右padding一致
+ """
+ if not cover_path or not cover_path.exists():
+ return None
+
+ with Image.open(cover_path) as original_img:
+ # 转换为 RGB 模式以确保兼容性
+ if original_img.mode not in ("RGB", "RGBA"):
+ cover_img = original_img.convert("RGB")
+ else:
+ cover_img = original_img
+
+ # 封面宽度应该等于内容区域宽度,以确保左右padding一致
+ target_width = content_width
+
+ # 计算缩放比例(保持宽高比)
+ if cover_img.width != target_width:
+ scale_ratio = target_width / cover_img.width
+ new_width = target_width
+ new_height = int(cover_img.height * scale_ratio)
+
+ # 检查高度是否超过最大限制
+ if new_height > self.MAX_COVER_HEIGHT:
+ # 如果高度超限,按高度重新计算
+ scale_ratio = self.MAX_COVER_HEIGHT / new_height
+ new_height = self.MAX_COVER_HEIGHT
+ new_width = int(new_width * scale_ratio)
+
+ cover_img = cover_img.resize(
+ (new_width, new_height),
+ Image.Resampling.LANCZOS,
+ )
+ elif cover_img is original_img:
+ # 如果没有做任何转换,需要 copy 一份,因为原图会在 with 结束时关闭
+ cover_img = cover_img.copy()
+
+ return cover_img
+
+ @suppress_exception
+ def _load_and_process_avatar(self, avatar: Path | None) -> PILImage | None:
+ """加载并处理头像(圆形裁剪,带抗锯齿)"""
+ if not avatar or not avatar.exists():
+ return None
+
+ with Image.open(avatar) as original_img:
+ # 转换为 RGBA 模式(用于更好的抗锯齿效果)
+ if original_img.mode != "RGBA":
+ avatar_img = original_img.convert("RGBA")
+ else:
+ avatar_img = original_img
+
+ # 使用超采样技术提高质量:先放大到指定倍数
+ scale = self.AVATAR_UPSCALE_FACTOR
+ temp_size = self.AVATAR_SIZE * scale
+ avatar_img = avatar_img.resize(
+ (temp_size, temp_size),
+ Image.Resampling.LANCZOS,
+ )
+
+ # 创建高分辨率圆形遮罩(带抗锯齿)
+ mask = Image.new("L", (temp_size, temp_size), 0)
+ mask_draw = ImageDraw.Draw(mask)
+ mask_draw.ellipse((0, 0, temp_size - 1, temp_size - 1), fill=255)
+
+ # 应用遮罩
+ output_avatar = Image.new(
+ "RGBA",
+ (temp_size, temp_size),
+ (0, 0, 0, 0),
+ )
+ output_avatar.paste(avatar_img, (0, 0))
+ output_avatar.putalpha(mask)
+
+ # 缩小到目标尺寸(抗锯齿缩放)
+ output_avatar = output_avatar.resize(
+ (self.AVATAR_SIZE, self.AVATAR_SIZE),
+ Image.Resampling.LANCZOS,
+ )
+
+ return output_avatar
+
+ async def _calculate_sections(self, result: ParseResult, content_width: int) -> list[SectionData]:
+ """计算各部分内容的高度和数据"""
+ sections: list[SectionData] = []
+
+ # 1. Header 部分
+ header_section = await self._calculate_header_section(result, content_width)
+ if header_section is not None:
+ sections.append(header_section)
+
+ # 2. 标题部分
+ if result.title:
+ title_lines = self._wrap_text(
+ result.title,
+ content_width,
+ self.fontset.title_font,
+ )
+ title_height = len(title_lines) * self.fontset.title_font.line_height
+ sections.append(TitleSectionData(height=title_height, lines=title_lines))
+
+ # 3. 封面,图集,图文内容
+ if cover_img := self._load_and_resize_cover(
+ await result.cover_path,
+ content_width=content_width,
+ ):
+ sections.append(CoverSectionData(height=cover_img.height, cover_img=cover_img))
+ elif result.img_contents:
+ # 如果没有封面但有图片,处理图片列表
+ img_grid_section = await self._calculate_image_grid_section(
+ result,
+ content_width,
+ )
+ if img_grid_section:
+ sections.append(img_grid_section)
+ elif result.graphics_contents:
+ for graphics_content in result.graphics_contents:
+ graphics_section = await self._calculate_graphics_section(
+ graphics_content,
+ content_width,
+ )
+ if graphics_section:
+ sections.append(graphics_section)
+
+ # 5. 文本内容
+ if result.text:
+ text_lines = self._wrap_text(
+ result.text,
+ content_width,
+ self.fontset.text_font,
+ )
+ text_height = len(text_lines) * self.fontset.text_font.line_height
+ sections.append(TextSectionData(height=text_height, lines=text_lines))
+
+ # 6. 额外信息
+ if result.extra_info:
+ extra_lines = self._wrap_text(
+ result.extra_info,
+ content_width,
+ self.fontset.extra_font,
+ )
+ extra_height = len(extra_lines) * self.fontset.extra_font.line_height
+ sections.append(ExtraSectionData(height=extra_height, lines=extra_lines))
+
+ # 7. 转发内容
+ if result.repost:
+ repost_section = await self._calculate_repost_section(result.repost)
+ sections.append(repost_section)
+
+ return sections
+
+ @suppress_exception_async
+ async def _calculate_graphics_section(
+ self, graphics_content: GraphicsContent, content_width: int
+ ) -> GraphicsSectionData | None:
+ """计算图文内容部分的高度和内容"""
+ # 加载图片
+ img_path = await graphics_content.get_path()
+ with Image.open(img_path) as original_img:
+ # 调整图片尺寸以适应内容宽度
+ if original_img.width > content_width:
+ ratio = content_width / original_img.width
+ new_height = int(original_img.height * ratio)
+ image = original_img.resize(
+ (content_width, new_height),
+ Image.Resampling.LANCZOS,
+ )
+ else:
+ # 如果不需要缩放,copy 一份
+ image = original_img.copy()
+
+ # 处理文本内容
+ text_lines = []
+ if graphics_content.text:
+ text_lines = self._wrap_text(
+ graphics_content.text,
+ content_width,
+ self.fontset.text_font,
+ )
+
+ # 计算总高度:文本高度 + 图片高度 + alt文本高度 + 间距
+ text_height = len(text_lines) * self.fontset.text_font.line_height if text_lines else 0
+ alt_height = self.fontset.extra_font.line_height if graphics_content.alt else 0
+ total_height = text_height + image.height + alt_height
+ if text_lines:
+ total_height += self.SECTION_SPACING # 文本和图片之间的间距
+ if graphics_content.alt:
+ total_height += self.SECTION_SPACING # 图片和alt文本之间的间距
+
+ return GraphicsSectionData(
+ height=total_height,
+ text_lines=text_lines,
+ image=image,
+ alt_text=graphics_content.alt,
+ )
+
+ async def _calculate_header_section(
+ self,
+ result: ParseResult,
+ content_width: int,
+ ) -> HeaderSectionData | None:
+ """计算 header 部分的高度和内容"""
+ if result.author is None:
+ return None
+
+ # 加载头像
+ avatar_img = self._load_and_process_avatar(await result.author.get_avatar_path())
+
+ # 计算文字区域宽度(始终预留头像空间)
+ text_area_width = content_width - (self.AVATAR_SIZE + self.AVATAR_TEXT_GAP)
+
+ # 发布者名称
+ name_lines = self._wrap_text(
+ result.author.name,
+ text_area_width,
+ self.fontset.name_font,
+ )
+
+ # 时间
+ time_text = result.formartted_datetime
+ time_lines = self._wrap_text(
+ time_text,
+ text_area_width,
+ self.fontset.extra_font,
+ )
+
+ # 计算 header 高度(取头像和文字中较大者)
+ text_height = len(name_lines) * self.fontset.name_font.line_height
+ if time_lines:
+ text_height += self.NAME_TIME_GAP + len(time_lines) * self.fontset.extra_font.line_height
+ header_height = max(self.AVATAR_SIZE, text_height)
+
+ return HeaderSectionData(
+ height=header_height,
+ avatar=avatar_img,
+ name_lines=name_lines,
+ time_lines=time_lines,
+ text_height=text_height,
+ )
+
+ async def _calculate_repost_section(self, repost: ParseResult) -> RepostSectionData:
+ """计算转发内容的高度和内容(递归调用绘制方法)"""
+ repost_image = await self._create_card_image(repost, False)
+ # 缩放图片
+ scaled_width = int(repost_image.width * self.REPOST_SCALE)
+ scaled_height = int(repost_image.height * self.REPOST_SCALE)
+ repost_image_scaled = repost_image.resize(
+ (scaled_width, scaled_height),
+ Image.Resampling.LANCZOS,
+ )
+
+ return RepostSectionData(
+ height=scaled_height + self.REPOST_PADDING * 2, # 加上转发容器的内边距
+ scaled_image=repost_image_scaled,
+ )
+
+ async def _calculate_image_grid_section(
+ self, result: ParseResult, content_width: int
+ ) -> ImageGridSectionData | None:
+ """计算图片网格部分的高度和内容"""
+ if not result.img_contents:
+ return None
+
+ # 检查是否有超过最大显示数量的图片
+ total_images = len(result.img_contents)
+ has_more = total_images > self.MAX_IMAGES_DISPLAY
+
+ # 如果超过最大显示数量,处理前N张,最后一张显示+N效果
+ if has_more:
+ img_contents = result.img_contents[: self.MAX_IMAGES_DISPLAY]
+ remaining_count = total_images - self.MAX_IMAGES_DISPLAY
+ else:
+ img_contents = result.img_contents[: self.MAX_IMAGES_DISPLAY]
+ remaining_count = 0
+
+ processed_images = []
+ img_count = len(img_contents)
+
+ for img_content in img_contents:
+ img_path = await img_content.get_path()
+ # 使用装饰器保护的方法,失败会返回 None
+ img = await self._load_and_process_grid_image(img_path, content_width, img_count)
+ if img is not None:
+ processed_images.append(img)
+
+ if not processed_images:
+ return None
+
+ # 计算网格布局
+ image_count = len(processed_images)
+
+ if image_count == 1:
+ # 单张图片
+ cols, rows = 1, 1
+ elif image_count in (2, 4):
+ # 2张或4张图片,使用2列布局
+ cols, rows = 2, (image_count + 1) // 2
+ else:
+ # 多张图片,使用3列布局(九宫格)
+ cols = self.IMAGE_GRID_COLS
+ rows = (image_count + cols - 1) // cols
+
+ # 计算高度
+ max_img_height = max(img.height for img in processed_images)
+ if len(processed_images) == 1:
+ # 单张图片
+ grid_height = max_img_height
+ else:
+ # 多张图片:上间距 + (图片 + 间距) * 行数
+ grid_height = self.IMAGE_GRID_SPACING + rows * (max_img_height + self.IMAGE_GRID_SPACING)
+
+ return ImageGridSectionData(
+ height=grid_height,
+ images=processed_images,
+ cols=cols,
+ rows=rows,
+ has_more=has_more,
+ remaining_count=remaining_count,
+ )
+
+ @suppress_exception_async
+ async def _load_and_process_grid_image(
+ self,
+ img_path: Path,
+ content_width: int,
+ img_count: int,
+ ) -> PILImage | None:
+ """加载并处理网格图片
+
+ Args:
+ img_path: 图片路径
+ content_width: 内容宽度
+ img_count: 图片总数(用于决定处理方式)
+
+ Returns:
+ 处理后的图片对象,失败返回 None
+ """
+ if not img_path.exists():
+ return None
+
+ with Image.open(img_path) as original_img:
+ img = original_img
+
+ # 根据图片数量决定处理方式
+ if img_count >= 2:
+ # 2张及以上图片,统一为方形
+ img = self._crop_to_square(img)
+
+ # 计算图片尺寸
+ if img_count == 1:
+ # 单张图片,根据卡片宽度调整,与视频封面保持一致
+ max_width = content_width
+ max_height = min(self.MAX_IMAGE_HEIGHT, content_width) # 限制最大高度
+ if img.width > max_width or img.height > max_height:
+ ratio = min(max_width / img.width, max_height / img.height)
+ new_size = (int(img.width * ratio), int(img.height * ratio))
+ img = img.resize(new_size, Image.Resampling.LANCZOS)
+ elif img is original_img:
+ # 如果没有做任何转换,需要 copy 一份
+ img = img.copy()
+ else:
+ # 多张图片,计算最大尺寸
+ if img_count in (2, 4):
+ # 2张或4张图片,使用2列布局
+ num_gaps = 3 # 2列有3个间距
+ max_size = (content_width - self.IMAGE_GRID_SPACING * num_gaps) // 2
+ max_size = min(max_size, self.IMAGE_2_GRID_SIZE)
+ else:
+ # 多张图片,使用3列布局
+ num_gaps = self.IMAGE_GRID_COLS + 1
+ max_size = (content_width - self.IMAGE_GRID_SPACING * num_gaps) // self.IMAGE_GRID_COLS
+ max_size = min(max_size, self.IMAGE_3_GRID_SIZE)
+
+ # 调整多张图片的尺寸
+ if img.width > max_size or img.height > max_size:
+ ratio = min(max_size / img.width, max_size / img.height)
+ new_size = (int(img.width * ratio), int(img.height * ratio))
+ img = img.resize(new_size, Image.Resampling.LANCZOS)
+ elif img is original_img:
+ # 如果没有做任何转换,需要 copy 一份
+ img = img.copy()
+
+ return img
+
+ def _crop_to_square(self, img: PILImage) -> PILImage:
+ """将图片裁剪为方形(上下切割)"""
+ width, height = img.size
+
+ if width == height:
+ return img
+
+ if width > height:
+ # 宽图片,左右切割
+ left = (width - height) // 2
+ right = left + height
+ return img.crop((left, 0, right, height))
+ else:
+ # 高图片,上下切割
+ top = (height - width) // 2
+ bottom = top + width
+ return img.crop((0, top, width, bottom))
+
+ async def _draw_sections(self, ctx: RenderContext, sections: list[SectionData]) -> None:
+ """绘制所有内容到画布上"""
+ for section in sections:
+ match section:
+ case HeaderSectionData() as header:
+ await self._draw_header(ctx, header)
+ case TitleSectionData() as title:
+ await self._draw_title(ctx, title.lines)
+ case CoverSectionData() as cover:
+ self._draw_cover(ctx, cover.cover_img)
+ case TextSectionData() as text:
+ await self._draw_text(ctx, text.lines)
+ case GraphicsSectionData() as graphics:
+ await self._draw_graphics(ctx, graphics)
+ case ExtraSectionData() as extra:
+ await self._draw_extra(ctx, extra.lines)
+ case RepostSectionData() as repost:
+ self._draw_repost(ctx, repost)
+ case ImageGridSectionData() as image_grid:
+ self._draw_image_grid(ctx, image_grid)
+
+ def _create_avatar_placeholder(self) -> PILImage:
+ """创建默认头像占位符"""
+ # 头像占位符配置常量
+ placeholder_bg_color = (230, 230, 230, 255)
+ placeholder_fg_color = (200, 200, 200, 255)
+ head_ratio = 0.35 # 头部位置比例
+ head_radius_ratio = 1 / 6 # 头部半径比例
+ shoulder_y_ratio = 0.55 # 肩部 Y 位置比例
+ shoulder_width_ratio = 0.55 # 肩部宽度比例
+ shoulder_height_ratio = 0.6 # 肩部高度比例
+
+ placeholder = Image.new(
+ "RGBA",
+ (self.AVATAR_SIZE, self.AVATAR_SIZE),
+ (0, 0, 0, 0),
+ )
+ draw = ImageDraw.Draw(placeholder)
+
+ # 绘制圆形背景
+ draw.ellipse(
+ (0, 0, self.AVATAR_SIZE - 1, self.AVATAR_SIZE - 1),
+ fill=placeholder_bg_color,
+ )
+
+ # 绘制简单的用户图标(圆形头部 + 肩部)
+ center_x = self.AVATAR_SIZE // 2
+
+ # 头部圆形
+ head_radius = int(self.AVATAR_SIZE * head_radius_ratio)
+ head_y = int(self.AVATAR_SIZE * head_ratio)
+ draw.ellipse(
+ (
+ center_x - head_radius,
+ head_y - head_radius,
+ center_x + head_radius,
+ head_y + head_radius,
+ ),
+ fill=placeholder_fg_color,
+ )
+
+ # 肩部
+ shoulder_y = int(self.AVATAR_SIZE * shoulder_y_ratio)
+ shoulder_width = int(self.AVATAR_SIZE * shoulder_width_ratio)
+ shoulder_height = int(self.AVATAR_SIZE * shoulder_height_ratio)
+ draw.ellipse(
+ (
+ center_x - shoulder_width // 2,
+ shoulder_y,
+ center_x + shoulder_width // 2,
+ shoulder_y + shoulder_height,
+ ),
+ fill=placeholder_fg_color,
+ )
+
+ # 创建圆形遮罩确保不超出边界
+ mask = Image.new("L", (self.AVATAR_SIZE, self.AVATAR_SIZE), 0)
+ mask_draw = ImageDraw.Draw(mask)
+ mask_draw.ellipse((0, 0, self.AVATAR_SIZE - 1, self.AVATAR_SIZE - 1), fill=255)
+
+ # 应用遮罩
+ placeholder.putalpha(mask)
+ return placeholder
+
+ async def _draw_header(self, ctx: RenderContext, section: HeaderSectionData) -> None:
+ """绘制 header 部分"""
+ x_pos = self.PADDING
+
+ # 绘制头像或占位符
+ avatar = section.avatar if section.avatar else self._create_avatar_placeholder()
+ ctx.image.paste(avatar, (x_pos, ctx.y_pos), avatar)
+
+ # 文字始终从头像位置后面开始
+ text_x = self.PADDING + self.AVATAR_SIZE + self.AVATAR_TEXT_GAP
+
+ # 计算文字垂直居中位置(对齐头像中轴)
+ avatar_center = ctx.y_pos + self.AVATAR_SIZE // 2
+ text_start_y = avatar_center - section.text_height // 2
+ text_y = text_start_y
+
+ # 发布者名称(蓝色)
+ text_y += await self.text(
+ ctx,
+ (text_x, text_y),
+ section.name_lines,
+ self.fontset.name_font,
+ fill=self.HEADER_COLOR,
+ )
+
+ # 时间(灰色)
+ if section.time_lines:
+ text_y += self.NAME_TIME_GAP
+ text_y += await self.text(
+ ctx,
+ (text_x, text_y),
+ section.time_lines,
+ self.fontset.extra_font,
+ fill=self.EXTRA_COLOR,
+ )
+
+ # 在右侧绘制平台 logo(仅在非转发内容时绘制)
+ if ctx.not_repost:
+ platform_name = ctx.result.platform.name
+ if platform_name in self.platform_logos:
+ logo_img = self.platform_logos[platform_name]
+ # 计算 logo 位置(右侧对齐)
+ logo_x = ctx.image.width - self.PADDING - logo_img.width
+ # 垂直居中对齐头像
+ logo_y = ctx.y_pos + (self.AVATAR_SIZE - logo_img.height) // 2
+ ctx.image.paste(logo_img, (logo_x, logo_y), logo_img)
+
+ ctx.y_pos += section.height + self.SECTION_SPACING
+
+ async def _draw_title(self, ctx: RenderContext, lines: list[str]) -> None:
+ """绘制标题"""
+ ctx.y_pos += await self.text(
+ ctx,
+ (self.PADDING, ctx.y_pos),
+ lines,
+ self.fontset.title_font,
+ self.TEXT_COLOR,
+ )
+
+ ctx.y_pos += self.SECTION_SPACING
+
+ def _draw_cover(self, ctx: RenderContext, cover_img: PILImage) -> None:
+ """绘制封面"""
+ # 封面从左边padding开始,和文字、头像对齐
+ x_pos = self.PADDING
+ ctx.image.paste(cover_img, (x_pos, ctx.y_pos))
+
+ # 添加视频播放按钮(居中)
+ button_size = 128 # 固定使用 128x128 尺寸
+ button_x = x_pos + (cover_img.width - button_size) // 2
+ button_y = ctx.y_pos + (cover_img.height - button_size) // 2
+ ctx.image.paste(
+ self.video_button_image,
+ (button_x, button_y),
+ self.video_button_image,
+ )
+
+ ctx.y_pos += cover_img.height + self.SECTION_SPACING
+
+ async def _draw_text(self, ctx: RenderContext, lines: list[str]) -> None:
+ """绘制文本内容"""
+ ctx.y_pos += await self.text(
+ ctx,
+ (self.PADDING, ctx.y_pos),
+ lines,
+ self.fontset.text_font,
+ fill=self.TEXT_COLOR,
+ )
+ ctx.y_pos += self.SECTION_SPACING
+
+ async def _draw_graphics(self, ctx: RenderContext, section: GraphicsSectionData) -> None:
+ """绘制图文内容"""
+ # 绘制文本内容(如果有)
+ if section.text_lines:
+ ctx.y_pos += await self.text(
+ ctx,
+ (self.PADDING, ctx.y_pos),
+ section.text_lines,
+ self.fontset.text_font,
+ fill=self.TEXT_COLOR,
+ )
+ ctx.y_pos += self.SECTION_SPACING # 文本和图片之间的间距
+
+ # 绘制图片(居中)
+ x_pos = self.PADDING + (ctx.content_width - section.image.width) // 2
+ ctx.image.paste(section.image, (x_pos, ctx.y_pos))
+ ctx.y_pos += section.image.height
+
+ # 绘制 alt 文本(如果有,居中显示)
+ if section.alt_text:
+ ctx.y_pos += self.SECTION_SPACING # 图片和alt文本之间的间距
+ # 计算文本居中位置
+ extra_font_info = self.fontset.extra_font
+ text_width = extra_font_info.get_text_width(section.alt_text)
+ text_x = self.PADDING + (ctx.content_width - text_width) // 2
+ ctx.y_pos += await self.text(
+ ctx,
+ (text_x, ctx.y_pos),
+ [section.alt_text],
+ self.fontset.extra_font,
+ fill=self.EXTRA_COLOR,
+ )
+
+ ctx.y_pos += self.SECTION_SPACING
+
+ async def _draw_extra(self, ctx: RenderContext, lines: list[str]) -> None:
+ """绘制额外信息"""
+ ctx.y_pos += await self.text(
+ ctx,
+ (self.PADDING, ctx.y_pos),
+ lines,
+ self.fontset.extra_font,
+ fill=self.EXTRA_COLOR,
+ )
+
+ def _draw_repost(self, ctx: RenderContext, section: RepostSectionData) -> None:
+ """绘制转发内容"""
+ # 获取缩放后的转发图片
+ repost_image = section.scaled_image
+
+ # 转发框占满整个内容区域,左右和边缘对齐
+ repost_x = self.PADDING
+ repost_y = ctx.y_pos
+ repost_width = ctx.content_width # 转发框宽度等于内容区域宽度
+ repost_height = section.height
+
+ # 绘制转发背景(圆角矩形)
+ self._draw_rounded_rectangle(
+ ctx.image,
+ (repost_x, repost_y, repost_x + repost_width, repost_y + repost_height),
+ self.REPOST_BG_COLOR,
+ radius=8,
+ )
+
+ # 绘制转发边框
+ self._draw_rounded_rectangle_border(
+ ctx.draw,
+ (repost_x, repost_y, repost_x + repost_width, repost_y + repost_height),
+ self.REPOST_BORDER_COLOR,
+ radius=8,
+ width=1,
+ )
+
+ # 转发图片在转发容器中居中
+ card_x = repost_x + (repost_width - repost_image.width) // 2
+ card_y = repost_y + self.REPOST_PADDING
+
+ # 将缩放后的转发图片贴到主画布上
+ ctx.image.paste(repost_image, (card_x, card_y))
+
+ ctx.y_pos += repost_height + self.SECTION_SPACING
+
+ def _draw_image_grid(self, ctx: RenderContext, section: ImageGridSectionData) -> None:
+ """绘制图片网格"""
+ images = section.images
+ cols = section.cols
+ rows = section.rows
+ has_more = section.has_more
+ remaining_count = section.remaining_count
+
+ if not images:
+ return
+
+ # 计算每个图片的尺寸和间距
+ available_width = ctx.content_width # 可用宽度
+ img_spacing = self.IMAGE_GRID_SPACING
+
+ # 根据图片数量计算每个图片的尺寸
+ if len(images) == 1:
+ # 单张图片,使用完整的可用宽度,与视频封面保持一致
+ max_img_size = available_width
+ else:
+ # 多张图片,统一使用间距计算,确保所有间距相同
+ num_gaps = cols + 1 # 2列有3个间距,3列有4个间距
+ calculated_size = (available_width - img_spacing * num_gaps) // cols
+ max_img_size = self.IMAGE_2_GRID_SIZE if cols == 2 else self.IMAGE_3_GRID_SIZE
+ max_img_size = min(calculated_size, max_img_size)
+
+ current_y = ctx.y_pos
+
+ for row in range(rows):
+ row_start = row * cols
+ row_end = min(row_start + cols, len(images))
+ row_images = images[row_start:row_end]
+
+ # 计算这一行的最大高度
+ max_height = max(img.height for img in row_images)
+
+ # 绘制这一行的图片
+ for i, img in enumerate(row_images):
+ # 统一使用间距计算方式
+ img_x = self.PADDING + img_spacing + i * (max_img_size + img_spacing)
+
+ img_y = current_y + img_spacing # 每行上方都有间距
+
+ # 居中放置图片
+ y_offset = (max_height - img.height) // 2
+ ctx.image.paste(img, (img_x, img_y + y_offset))
+
+ # 如果是最后一张图片且有更多图片,绘制+N效果
+ if has_more and row == rows - 1 and i == len(row_images) - 1 and len(images) == self.MAX_IMAGES_DISPLAY:
+ self._draw_more_indicator(
+ ctx.image,
+ img_x,
+ img_y,
+ max_img_size,
+ max_height,
+ remaining_count,
+ )
+
+ current_y += img_spacing + max_height
+
+ ctx.y_pos = current_y + img_spacing + self.SECTION_SPACING
+
+ def _draw_more_indicator(
+ self,
+ image: PILImage,
+ img_x: int,
+ img_y: int,
+ img_width: int,
+ img_height: int,
+ count: int,
+ ):
+ """在图片上绘制+N指示器"""
+ draw = ImageDraw.Draw(image)
+
+ # 创建半透明黑色遮罩(透明度 1/4)
+ overlay = Image.new("RGBA", (img_width, img_height), (0, 0, 0, 0))
+ overlay_draw = ImageDraw.Draw(overlay)
+ overlay_draw.rectangle((0, 0, img_width - 1, img_height - 1), fill=(0, 0, 0, 100))
+
+ # 将遮罩贴到图片上
+ image.paste(overlay, (img_x, img_y), overlay)
+
+ # 绘制+N文字
+ text = f"+{count}"
+ font_info = self.fontset.indicator_font
+ # 计算文字位置(居中)
+ text_width = font_info.get_text_width(text)
+ text_x = img_x + (img_width - text_width) // 2
+ text_y = img_y + (img_height - font_info.line_height) // 2
+
+ # 绘制50%透明白色文字
+ draw.text((text_x, text_y), text, fill=(255, 255, 255), font=font_info.font)
+
+ def _draw_rounded_rectangle(
+ self,
+ image: PILImage,
+ bbox: tuple[int, int, int, int],
+ fill_color: Color,
+ radius: int = 8,
+ ):
+ """绘制圆角矩形"""
+ x1, y1, x2, y2 = bbox
+ draw = ImageDraw.Draw(image)
+
+ # 绘制主体矩形
+ draw.rectangle((x1 + radius, y1, x2 - radius, y2), fill=fill_color)
+ draw.rectangle((x1, y1 + radius, x2, y2 - radius), fill=fill_color)
+
+ # 绘制四个圆角
+ draw.pieslice((x1, y1, x1 + 2 * radius, y1 + 2 * radius), 180, 270, fill=fill_color)
+ draw.pieslice((x2 - 2 * radius, y1, x2, y1 + 2 * radius), 270, 360, fill=fill_color)
+ draw.pieslice((x1, y2 - 2 * radius, x1 + 2 * radius, y2), 90, 180, fill=fill_color)
+ draw.pieslice((x2 - 2 * radius, y2 - 2 * radius, x2, y2), 0, 90, fill=fill_color)
+
+ def _draw_rounded_rectangle_border(
+ self,
+ draw: ImageDraw.ImageDraw,
+ bbox: tuple[int, int, int, int],
+ border_color: Color,
+ radius: int = 8,
+ width: int = 1,
+ ):
+ """绘制圆角矩形边框"""
+ x1, y1, x2, y2 = bbox
+
+ # 绘制主体边框
+ draw.rectangle((x1 + radius, y1, x2 - radius, y1 + width), fill=border_color) # 上
+ draw.rectangle((x1 + radius, y2 - width, x2 - radius, y2), fill=border_color) # 下
+ draw.rectangle((x1, y1 + radius, x1 + width, y2 - radius), fill=border_color) # 左
+ draw.rectangle((x2 - width, y1 + radius, x2, y2 - radius), fill=border_color) # 右
+
+ # 绘制四个圆角边框
+ draw.arc(
+ (x1, y1, x1 + 2 * radius, y1 + 2 * radius),
+ 180,
+ 270,
+ fill=border_color,
+ width=width,
+ )
+ draw.arc(
+ (x2 - 2 * radius, y1, x2, y1 + 2 * radius),
+ 270,
+ 360,
+ fill=border_color,
+ width=width,
+ )
+ draw.arc(
+ (x1, y2 - 2 * radius, x1 + 2 * radius, y2),
+ 90,
+ 180,
+ fill=border_color,
+ width=width,
+ )
+ draw.arc(
+ (x2 - 2 * radius, y2 - 2 * radius, x2, y2),
+ 0,
+ 90,
+ fill=border_color,
+ width=width,
+ )
+
+ def _wrap_text(self, text: str | None, max_width: int, font_info: FontInfo) -> list[str]:
+ """优化的文本自动换行算法,考虑中英文字符宽度相同
+
+ Args:
+ text: 要处理的文本
+ max_width: 最大宽度(像素)
+ font_info: 字体信息对象
+
+ Returns:
+ 换行后的文本列表
+ """
+ if not text:
+ return []
+
+ lines: list[str] = []
+ paragraphs = text.splitlines()
+
+ def is_punctuation(char: str) -> bool:
+ """判断是否为不能为行首的标点符号"""
+ return char in ",。!?;:、)】》〉」』〕〗〙〛…—·" or char in ",.;:!?)]}"
+
+ for paragraph in paragraphs:
+ if not paragraph:
+ lines.append("")
+ continue
+
+ current_line = ""
+ current_line_width = 0
+ remaining_text = paragraph
+
+ while remaining_text:
+ next_char = remaining_text[0]
+ char_width = font_info.get_char_width_fast(next_char)
+ # 如果当前行为空,直接添加字符
+ if not current_line:
+ current_line = next_char
+ current_line_width = char_width
+ remaining_text = remaining_text[1:]
+ continue
+
+ # 如果是标点符号,直接添加到当前行(标点符号不应该单独成行)
+ if is_punctuation(next_char):
+ current_line += next_char
+ current_line_width += char_width
+ remaining_text = remaining_text[1:]
+ continue
+
+ # 测试添加下一个字符后的宽度
+ test_width = current_line_width + char_width
+
+ if test_width <= max_width:
+ # 宽度合适,继续添加
+ current_line += next_char
+ current_line_width = test_width
+ remaining_text = remaining_text[1:]
+ else:
+ # 宽度超限,需要断行
+ lines.append(current_line)
+ current_line = next_char
+ current_line_width = char_width
+ remaining_text = remaining_text[1:]
+
+ # 保存最后一行
+ if current_line:
+ lines.append(current_line)
+
+ return lines
diff --git a/core/resources/HYSongYunLangHeiW-1.ttf b/core/resources/HYSongYunLangHeiW-1.ttf
new file mode 100644
index 0000000..79ed7df
Binary files /dev/null and b/core/resources/HYSongYunLangHeiW-1.ttf differ
diff --git a/core/resources/bilibili.png b/core/resources/bilibili.png
new file mode 100644
index 0000000..00f03f4
Binary files /dev/null and b/core/resources/bilibili.png differ
diff --git a/core/resources/douyin.png b/core/resources/douyin.png
new file mode 100644
index 0000000..357b2d6
Binary files /dev/null and b/core/resources/douyin.png differ
diff --git a/core/resources/kuaishou.png b/core/resources/kuaishou.png
new file mode 100644
index 0000000..f1689b0
Binary files /dev/null and b/core/resources/kuaishou.png differ
diff --git a/core/resources/media_button.png b/core/resources/media_button.png
new file mode 100644
index 0000000..b57717f
Binary files /dev/null and b/core/resources/media_button.png differ
diff --git a/core/resources/tiktok.png b/core/resources/tiktok.png
new file mode 100644
index 0000000..138f500
Binary files /dev/null and b/core/resources/tiktok.png differ
diff --git a/core/resources/twitter.png b/core/resources/twitter.png
new file mode 100644
index 0000000..d37c609
Binary files /dev/null and b/core/resources/twitter.png differ
diff --git a/core/resources/weibo.png b/core/resources/weibo.png
new file mode 100644
index 0000000..bd2f545
Binary files /dev/null and b/core/resources/weibo.png differ
diff --git a/core/resources/xiaohongshu.png b/core/resources/xiaohongshu.png
new file mode 100644
index 0000000..621987c
Binary files /dev/null and b/core/resources/xiaohongshu.png differ
diff --git a/core/resources/youtube.png b/core/resources/youtube.png
new file mode 100644
index 0000000..1997bb2
Binary files /dev/null and b/core/resources/youtube.png differ
diff --git a/core/utils.py b/core/utils.py
new file mode 100644
index 0000000..997ebd6
--- /dev/null
+++ b/core/utils.py
@@ -0,0 +1,285 @@
+import asyncio
+import hashlib
+import re
+from collections import OrderedDict
+from http import cookiejar
+from pathlib import Path
+from typing import TypeVar
+from urllib.parse import urlparse
+
+from astrbot.api import logger
+from astrbot.core.message.components import BaseMessageComponent, Node, Nodes
+
+K = TypeVar("K")
+V = TypeVar("V")
+
+
+class LimitedSizeDict(OrderedDict[K, V]):
+ """
+ 定长字典
+ """
+
+ def __init__(self, *args, max_size=20, **kwargs):
+ self.max_size = max_size
+ super().__init__(*args, **kwargs)
+
+ def __setitem__(self, key: K, value: V):
+ super().__setitem__(key, value)
+ if len(self) > self.max_size:
+ self.popitem(last=False) # 移除最早添加的项
+
+
+def construct_forward_message(
+ chain: list[BaseMessageComponent],
+ user_id: str | None = None,
+) -> Nodes:
+ """构造转发消息
+
+ Args:
+ chain (list[BaseMessageComponent]): 消息链
+ user_id (str): 用户ID
+
+ Returns:
+ Nodes: 转发组件
+ """
+ if user_id is None:
+ user_id = "114514"
+ nodes = Nodes([])
+ for seg in chain:
+ node = Node(uin=user_id, name="astrbot", content=[seg])
+ nodes.nodes.append(node)
+
+ return nodes
+
+def keep_zh_en_num(text: str) -> str:
+ """
+ 保留字符串中的中英文和数字
+ """
+ return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9\-_]", "", text.replace(" ", "_"))
+
+
+async def safe_unlink(path: Path):
+ """
+ 安全删除文件
+ """
+ try:
+ await asyncio.to_thread(path.unlink, missing_ok=True)
+ except Exception:
+ logger.warning(f"删除 {path} 失败")
+
+
+async def exec_ffmpeg_cmd(cmd: list[str]) -> None:
+ """执行命令
+
+ Args:
+ cmd (list[str]): 命令序列
+ """
+ try:
+ process = await asyncio.create_subprocess_exec(
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+ )
+ _, stderr = await process.communicate()
+ return_code = process.returncode
+ except FileNotFoundError:
+ raise RuntimeError("ffmpeg 未安装或无法找到可执行文件")
+
+ if return_code != 0:
+ error_msg = stderr.decode().strip()
+ raise RuntimeError(f"ffmpeg 执行失败: {error_msg}")
+
+
+async def merge_av(
+ *,
+ v_path: Path,
+ a_path: Path,
+ output_path: Path,
+) -> None:
+ """合并视频和音频
+
+ Args:
+ v_path (Path): 视频文件路径
+ a_path (Path): 音频文件路径
+ output_path (Path): 输出文件路径
+ """
+ logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name}")
+
+ cmd = [
+ "ffmpeg",
+ "-y",
+ "-i",
+ str(v_path),
+ "-i",
+ str(a_path),
+ "-c",
+ "copy",
+ "-map",
+ "0:v:0",
+ "-map",
+ "1:a:0",
+ str(output_path),
+ ]
+
+ await exec_ffmpeg_cmd(cmd)
+ await asyncio.gather(safe_unlink(v_path), safe_unlink(a_path))
+ logger.info(f"Merged {output_path.name}, {fmt_size(output_path)}")
+
+
+async def merge_av_h264(
+ *,
+ v_path: Path,
+ a_path: Path,
+ output_path: Path,
+) -> None:
+ """合并视频和音频,并使用 H.264 编码
+
+ Args:
+ v_path (Path): 视频文件路径
+ a_path (Path): 音频文件路径
+ output_path (Path): 输出文件路径
+ """
+ logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name} with H.264")
+
+ # 修改命令以确保视频使用 H.264 编码
+ cmd = [
+ "ffmpeg",
+ "-y",
+ "-i",
+ str(v_path),
+ "-i",
+ str(a_path),
+ "-c:v",
+ "libx264", # 明确指定使用 H.264 编码
+ "-preset",
+ "medium", # 编码速度和质量的平衡
+ "-crf",
+ "23", # 质量因子,值越低质量越高
+ "-c:a",
+ "aac", # 音频使用 AAC 编码
+ "-b:a",
+ "128k", # 音频比特率
+ "-map",
+ "0:v:0",
+ "-map",
+ "1:a:0",
+ str(output_path),
+ ]
+
+ await exec_ffmpeg_cmd(cmd)
+ await asyncio.gather(safe_unlink(v_path), safe_unlink(a_path))
+ logger.info(f"Merged {output_path.name} with H.264, {fmt_size(output_path)}")
+
+
+async def encode_video_to_h264(video_path: Path) -> Path:
+ """将视频重新编码到 h264
+
+ Args:
+ video_path (Path): 视频路径
+
+ Returns:
+ Path: 编码后的视频路径
+ """
+ output_path = video_path.with_name(f"{video_path.stem}_h264{video_path.suffix}")
+ if output_path.exists():
+ return output_path
+ cmd = [
+ "ffmpeg",
+ "-y",
+ "-i",
+ str(video_path),
+ "-c:v",
+ "libx264",
+ "-preset",
+ "medium",
+ "-crf",
+ "23",
+ str(output_path),
+ ]
+ await exec_ffmpeg_cmd(cmd)
+ logger.info(f"视频重新编码为 H.264 成功: {output_path}, {fmt_size(output_path)}")
+ await safe_unlink(video_path)
+ return output_path
+
+
+def fmt_size(file_path: Path) -> str:
+ """格式化文件大小
+
+ Args:
+ video_path (Path): 视频路径
+ """
+ return f"大小: {file_path.stat().st_size / 1024 / 1024:.2f} MB"
+
+
+def generate_file_name(url: str, default_suffix: str = "") -> str:
+ """根据 url 生成文件名
+
+ Args:
+ url (str): url
+ default_suffix (str): 默认后缀. Defaults to "".
+
+ Returns:
+ str: 文件名
+ """
+ # 根据 url 获取文件后缀
+ path = Path(urlparse(url).path)
+ suffix = path.suffix if path.suffix else default_suffix
+ # 获取 url 的 md5 值
+ url_hash = hashlib.md5(url.encode()).hexdigest()[:16]
+ file_name = f"{url_hash}{suffix}"
+ return file_name
+
+
+
+def save_cookies_with_netscape(cookies_str: str, file_path: Path, domain: str):
+ """以 netscape 格式保存 cookies
+
+ Args:
+ cookies_str: cookies 字符串
+ file_path: 保存的文件路径
+ domain: 域名
+ """
+ # 创建 MozillaCookieJar 对象
+ cj = cookiejar.MozillaCookieJar(file_path)
+
+ # 从字符串创建 cookies 并添加到 MozillaCookieJar 对象
+ for cookie in cookies_str.split(";"):
+ name, value = cookie.strip().split("=", 1)
+ cj.set_cookie(
+ cookiejar.Cookie(
+ version=0,
+ name=name,
+ value=value,
+ port=None,
+ port_specified=False,
+ domain="." + domain,
+ domain_specified=True,
+ domain_initial_dot=False,
+ path="/",
+ path_specified=True,
+ secure=True,
+ expires=0,
+ discard=True,
+ comment=None,
+ comment_url=None,
+ rest={"HttpOnly": ""},
+ rfc2109=False,
+ )
+ )
+
+ # 保存 cookies 到文件
+ cj.save(ignore_discard=True, ignore_expires=True)
+
+
+def ck2dict(cookies_str: str) -> dict[str, str]:
+ """将 cookies 字符串转换为字典
+
+ Args:
+ cookies_str: cookies 字符串
+
+ Returns:
+ dict[str, str]: 字典
+ """
+ res = {}
+ for cookie in cookies_str.split(";"):
+ name, value = cookie.strip().split("=", 1)
+ res[name] = value
+ return res
diff --git a/logo.png b/logo.png
new file mode 100644
index 0000000..52e91ef
Binary files /dev/null and b/logo.png differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..916d147
--- /dev/null
+++ b/main.py
@@ -0,0 +1,231 @@
+# main.py
+
+import re
+from pathlib import Path
+
+from astrbot.api import logger
+from astrbot.api.event import filter
+from astrbot.api.star import Context, Star, StarTools, register
+from astrbot.core import AstrBotConfig
+from astrbot.core.message.components import Image, Record
+from astrbot.core.platform.astr_message_event import AstrMessageEvent
+
+from .core.clean import CacheCleaner
+from .core.download import Downloader
+from .core.parsers import BaseParser, BilibiliParser, ParseResult, YouTubeParser
+from .core.render import CommonRenderer
+from .core.utils import save_cookies_with_netscape
+
+
+@register("astrbot_plugin_parser", "Zhalslar", "...", "...")
+class ParserPlugin(Star):
+ def __init__(self, context: Context, config: AstrBotConfig):
+ super().__init__(context)
+ self.context = context
+ self.config = config
+
+ # 插件数据目录
+ self.data_dir: Path = StarTools.get_data_dir("astrbot_plugin_parser")
+ config["data_dir"] = str(self.data_dir)
+
+ # 缓存目录
+ self.cache_dir: Path = self.data_dir / "cache_dir"
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
+ config["cache_dir"] = str(self.cache_dir)
+
+ # ytb_cookies
+ if self.config["ytb_ck"]:
+ ytb_cookies_file = self.data_dir / "ytb_cookies.txt"
+ ytb_cookies_file.parent.mkdir(parents=True, exist_ok=True)
+ save_cookies_with_netscape(
+ self.config["ytb_ck"],
+ ytb_cookies_file,
+ "youtube.com",
+ )
+ config["ytb_cookies_file"] = str(ytb_cookies_file)
+
+ config.save_config()
+
+ # 关键词 -> Parser 映射
+ self.parser_map: dict[str, BaseParser] = {}
+
+ # 关键词 -> 正则 列表
+ self.key_pattern_list: list[tuple[str, re.Pattern[str]]] = []
+
+ # 渲染器
+ self.renderer = CommonRenderer(config)
+
+ # 下载器
+ self.downloader = Downloader(config)
+
+ # 缓存清理器
+ self.cleaner = CacheCleaner(self.context, self.config)
+
+ async def initialize(self):
+ """加载、重载插件时触发"""
+ self.register_parser()
+ CommonRenderer.load_resources()
+
+ def register_parser(self):
+ """注册解析器"""
+ # 获取所有解析器
+ all_subclass = BaseParser.get_all_subclass()
+ # 过滤掉禁用的平台
+ enabled_classes = [
+ _cls
+ for _cls in all_subclass
+ if _cls.platform.display_name in self.config["enable_platforms"]
+ ]
+ # 启用的平台
+ platform_names = []
+ for _cls in enabled_classes:
+ parser = _cls(self.config, self.downloader)
+ platform_names.append(parser.platform.display_name)
+ for keyword, _ in _cls._key_patterns:
+ self.parser_map[keyword] = parser
+ logger.info(f"启用平台: {'、'.join(platform_names)}")
+
+ # 关键词-正则对,一次性生成并排序
+ patterns: list[tuple[str, re.Pattern[str]]] = [
+ (kw, re.compile(pt) if isinstance(pt, str) else pt)
+ for cls in enabled_classes
+ for kw, pt in cls._key_patterns
+ ]
+ # 长关键词优先
+ patterns.sort(key=lambda x: -len(x[0]))
+ logger.debug(f"关键词-正则对已生成:{patterns}")
+ self.key_pattern_list = patterns
+
+ def get_parser_by_type(self, parser_type):
+ for parser in self.parser_map.values():
+ if isinstance(parser, parser_type):
+ return parser
+ raise ValueError(f"未找到类型为 {parser_type} 的 parser 实例")
+
+ @filter.event_message_type(filter.EventMessageType.ALL)
+ async def prob_read_feed(self, event: AstrMessageEvent):
+ """消息的统一入口"""
+ umo = event.unified_msg_origin
+
+ # 禁用会话
+ if umo in self.config["disabled_sessions"]:
+ return
+
+ text = event.message_str
+
+ # 过滤空文本
+ if not text:
+ return
+
+ # 匹配 (关键词 + 正则双重判定)
+ keyword: str = ""
+ searched: re.Match[str] | None = None
+ for kw, pat in self.key_pattern_list:
+ if kw not in text:
+ continue
+ if m := pat.search(text):
+ keyword, searched = kw, m
+ break
+ if searched is None:
+ return
+
+ logger.debug(f"匹配结果: {keyword}, {searched}")
+
+ # 取解析器
+ parser = self.parser_map[keyword]
+ # 解析
+ parse_res: ParseResult = await parser.parse(keyword, searched)
+
+ # 渲染内容并发送
+ async for chain in self.renderer.render_messages(parse_res):
+ yield event.chain_result(chain) # type: ignore
+
+ @filter.permission_type(filter.PermissionType.ADMIN)
+ @filter.command("bm")
+ async def bm(self, event: AstrMessageEvent):
+ """获取B站的音频"""
+ text = event.message_str
+ matched = re.search(r"(BV[A-Za-z0-9]{10})(\s\d{1,3})?", text)
+ if not matched:
+ yield event.plain_result("请发送正确的 BV 号")
+ return
+
+ bvid, page_num = matched.group(1), matched.group(2)
+ page_idx = int(page_num) if page_num else 0
+
+ parser: BilibiliParser = self.get_parser_by_type(BilibiliParser) # type: ignore
+
+ _, audio_url = await parser.extract_download_urls(
+ bvid=bvid, page_index=page_idx
+ )
+ if not audio_url:
+ yield event.plain_result("未找到可下载的音频")
+ return
+
+ audio_path = await self.downloader.download_audio(
+ audio_url, audio_name=f"{bvid}-{page_idx}.mp3", ext_headers=parser.headers
+ )
+ yield event.chain_result([Record(audio_path)]) # type: ignore
+
+ if self.config["upload_audio"]:
+ pass
+
+ @filter.permission_type(filter.PermissionType.ADMIN)
+ @filter.command("ym")
+ async def ym(self, event: AstrMessageEvent):
+ """获取油管的音频"""
+ text = event.message_str
+ parser = self.get_parser_by_type(YouTubeParser)
+ _, matched = parser.search_url(text)
+ if not matched:
+ yield event.plain_result("请发送正确的油管链接")
+ return
+
+ url = matched.group(0)
+
+ audio_path = await self.downloader.download_audio(url, use_ytdlp=True)
+ yield event.chain_result([Record(audio_path)]) # type: ignore
+
+ if self.config["upload_audio"]:
+ pass
+
+ @filter.permission_type(filter.PermissionType.ADMIN)
+ @filter.command("登录B站", alias={"blogin", "登录b站"})
+ async def login_bilibili(self, event: AstrMessageEvent):
+ """扫码登录B站"""
+ parser: BilibiliParser = self.get_parser_by_type(BilibiliParser) # type: ignore
+ qrcode = await parser.login_with_qrcode()
+ yield event.chain_result([Image.fromBytes(qrcode)])
+ async for msg in parser.check_qr_state():
+ yield event.plain_result(msg)
+
+ @filter.command("开启解析")
+ async def open_parser(self, event: AstrMessageEvent):
+ """开启当前会话的解析"""
+ umo = event.unified_msg_origin
+ if umo in self.config["disabled_sessions"]:
+ self.config["disabled_sessions"].remove(umo)
+ self.config.save_config()
+ yield event.plain_result("解析已开启")
+ else:
+ yield event.plain_result("解析已开启,无需重复开启")
+
+ @filter.command("关闭解析")
+ async def close_parser(self, event: AstrMessageEvent):
+ """关闭当前会话的解析"""
+ umo = event.unified_msg_origin
+ if umo not in self.config["disabled_sessions"]:
+ self.config["disabled_sessions"].append(umo)
+ self.config.save_config()
+ yield event.plain_result("解析已关闭")
+ else:
+ yield event.plain_result("解析已关闭,无需重复关闭")
+
+ async def terminate(self):
+ """插件卸载时"""
+ # 关下载器里的会话
+ await self.downloader.close()
+ # 关所有解析器里的会话
+ await BaseParser.close_session()
+ # 关缓存清理器
+ await self.cleaner.stop()
diff --git a/metadata.yaml b/metadata.yaml
new file mode 100644
index 0000000..d923e29
--- /dev/null
+++ b/metadata.yaml
@@ -0,0 +1,8 @@
+name: astrbot_plugin_parser # 这是你的插件的唯一识别名。
+display_name: 链接解析器
+desc: 高性能低耦合的万能链接解析器。支持的类型:视频、图集、音频。 支持的平台:A站、B站、抖音、tiktok、微博、小红书、快手、油管、推特...
+help: 略 # 插件的帮助信息
+version: v1.0.0 # 插件版本号。格式:v1.1.1 或者 v1.1
+author: Zhalslar # 作者
+repo: https://github.com/Zhalslar/astrbot_plugin_parser # 插件的仓库地址
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1581925
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+# Astrbot已规定的依赖此处不再填写
+
+tqdm>=4.67.1,<5.0.0
+curl_cffi>=0.13.0,<1.0.0
+msgspec>=0.20.0,<1.0.0
+apilmoji[tqdm]>=0.2.3,<1.0.0
+bilibili-api-python>=17.4.0,<18.0.0
+yt-dlp[default]>=2025.11.12