diff --git a/ANALYSIS.md b/ANALYSIS.md new file mode 100644 index 0000000..2ce29fc --- /dev/null +++ b/ANALYSIS.md @@ -0,0 +1,286 @@ +# TeleBox Reverse Engineering Analysis / 逆向分析笔记 + +> Goal: provide a roadmap for building your own version. Current branch: +> `feature/TelegramUserbot/Leeching/V1Telebox`, direction: Telegram Userbot / Leeching / V1Telebox. +> +> 目标:给后续做自己的版本做路线图。当前工作分支为 `feature/TelegramUserbot/Leeching/V1Telebox`,方向是 Telegram Userbot / Leeching / V1Telebox。 + +## 0. Current Pull State / 当前拉取状态 + +| Item / 项目 | Value / 值 | +|---|---| +| Local path / 本地路径 | `C:\Users\user\Desktop\TP AI Agent\TeleBox_reverse\TeleBox` | +| Git remote | `https://github.com/TeleBoxOrg/TeleBox.git` | +| Current branch / 当前分支 | `feature/TelegramUserbot/Leeching/V1Telebox` | +| Current commit | `6902a3513420195aa9b9bd9077ce0ec14163d0ad` | +| Project version / 项目版本 | `telebox@0.2.8` | +| License | `LGPL-2.1-only` | +| Node requirement / Node 要求 | `24.x` | +| Telegram client lib / Telegram 客户端库 | `teleproto ^1.227.0` | + +## 1. Architecture Summary / 一句话架构 + +TeleBox is a **Node.js + TypeScript + teleproto** Telegram UserBot framework: + +TeleBox 是一个 **Node.js + TypeScript + teleproto** 的 Telegram UserBot 框架: + +1. `src/index.ts` — entry point, loads env/logger/patches, starts runtime. / 入口,加载环境变量、logger、patch,启动 runtime。 +2. `src/utils/runtimeManager.ts` — creates `TelegramClient`, restores session, builds `GenerationContext`. / 创建 TelegramClient,恢复 session,建立 GenerationContext。 +3. `src/utils/pluginManager.ts` — dynamically loads plugins, registers commands/events/cron. / 动态加载插件,注册命令/事件/cron。 +4. Plugins extend `Plugin` from `src/utils/pluginBase.ts`, expose `cmdHandlers`. / 插件继承 Plugin,通过 cmdHandlers 暴露命令。 +5. Runtime data stored in `config.json`, `assets/*`, `temp/*`. / 运行期数据存储在 config.json、assets、temp。 + +```mermaid +flowchart TD + A["npm start"] --> B["scripts/run-tsx.cjs"] + B --> C["src/index.ts"] + C --> D["runtimeManager.startRuntime()"] + D --> E["apiConfig.getApiConfig()"] + E --> F["TelegramClient + StringSession"] + F --> G["loginManager.initializeClientSession()"] + G --> H["GenerationContext #N"] + H --> I["pluginManager.loadPluginsForRuntime()"] + I --> J["plugins/*.ts 用户插件"] + I --> K["src/plugin/*.ts 内置插件"] + J --> L["Telegram NewMessage / EditedMessage handlers"] + K --> L +``` + +## 2. 启动与运行时链路 + +### 2.1 入口:`src/index.ts` + +- `import "dotenv/config"`:读取 `.env`。 +- 初始化 `logger`。 +- 引入 `./hook/patches/telegram.patch`:patch HTML parser 和 Message 原型方法。 +- 注册 `unhandledRejection` / `uncaughtException`。 +- 调用 `startRuntime()`。 + +### 2.2 Runtime:`src/utils/runtimeManager.ts` + +核心函数: + +- `createClient()`:读取 `config.json` 中的 `api_id/api_hash/session/proxy`,创建 `TelegramClient(new StringSession(...))`。 +- `buildRuntime()`:创建新的 `GenerationContext`,连接 Telegram,注册断线 watchdog。 +- `startRuntime()`:首次启动。 +- `reloadRuntime()`:完整重载:卸载旧插件 -> drain/销毁旧 client -> 创建新 client/generation -> 重新加载插件。 +- `shutdownRuntime()`:退出清理。 + +关键概念:**GenerationContext 是每次 runtime 的生命周期容器**。写 leech/download/upload 这类长任务时,必须把下载 promise、timer、子进程、listener 都绑定到当前 generation,避免 reload 后旧任务残留。 + +### 2.3 登录与 session + +相关文件: + +- `src/utils/apiConfig.ts` +- `src/utils/loginManager.ts` + +运行后会自动创建/读取 `config.json`: + +```json +{ + "api_id": 123456, + "api_hash": "xxxx", + "session": "xxxx", + "proxy": {} +} +``` + +注意:真实 `config.json` 不能提交。 + +## 3. 插件系统逆向重点 + +### 3.1 插件最小形态 + +```ts +import { Plugin } from "@utils/pluginBase"; +import { Api } from "teleproto"; + +class MyPlugin extends Plugin { + description = "说明"; + + cmdHandlers: Record Promise> = { + mycmd: async (msg) => { + await msg.edit({ text: "ok" }); + }, + }; +} + +export default new MyPlugin(); +``` + +可选字段: + +- `listenMessageHandler(msg, options)`:全局消息监听。 +- `eventHandlers`:直接注册 teleproto event builder。 +- `cronTasks`:定时任务。 +- `setup(context)` / `cleanup()`:生命周期初始化和清理。 +- `ignoreEdited`:命令是否忽略编辑消息,默认来自 `TB_CMD_IGNORE_EDITED`。 + +### 3.2 插件加载顺序 + +`loadPluginsForRuntime()` 的顺序: + +1. 加载 `plugins/*.ts` 用户插件。 +2. 加载 `src/plugin/*.ts` 内置插件。 +3. 对每个插件执行 `setup()`。 +4. 注册根命令监听:`NewMessage` 和 `EditedMessage`。 +5. 注册插件级 `listenMessageHandler`、`eventHandlers`、`cronTasks`。 + +注意:如果用户插件与内置插件命令同名,后加载的内置插件可能覆盖前面的命令映射。要覆盖内置命令时,建议改 `src/plugin` 或调整加载策略。 + +### 3.3 命令触发逻辑 + +`dealCommandPlugin()` 只处理: + +- `msg.out`:自己发出的消息。 +- 或 `savedPeerId`:收藏/保存消息场景。 + +默认前缀: + +- 生产:`.`, `。`, `$` +- 开发模式:`!`, `!` +- 可由 `.env` 的 `TB_PREFIX` 覆盖。 + +## 4. 内置插件地图 + +| 文件 | 主要命令 | 作用 | 逆向价值 | +|---|---|---|---| +| `src/plugin/help.ts` | `help`, `h` | 帮助系统 | 看插件枚举与 description 输出 | +| `src/plugin/alias.ts` | `alias` | 命令别名 | 看 SQLite 与多词命令重写 | +| `src/plugin/prefix.ts` | `prefix` | 设置命令前缀 | 看运行时配置修改 | +| `src/plugin/sudo.ts` | `sudo` | sudo 用户/群权限 | 看权限和代执行模型 | +| `src/plugin/sure.ts` | `sure` | 白名单/确认消息机制 | 看 listener + DB 规则匹配 | +| `src/plugin/re.ts` | `re` | 回复消息复读 | 看 reply message 获取 | +| `src/plugin/debug.ts` | `id`, `entity` | 用户/群/频道/link 解析 | Telegram entity 逆向重点 | +| `src/plugin/ping.ts` | `ping` | API/ICMP/DC 延迟测试 | 看网络诊断与 child process | +| `src/plugin/exec.ts` | `exec` | Telegram 执行 shell | 高风险能力;看子进程生命周期 | +| `src/plugin/status.ts` | `status`, `sysinfo` | 系统状态 | 看运行状态聚合 | +| `src/plugin/reload.ts` | `reload` | 插件/进程重载 | 看 runtime reload 边界 | +| `src/plugin/update.ts` | `update` | git 更新 + npm install | 二开时建议改 remote/branch 逻辑 | +| `src/plugin/tpm.ts` | `tpm` | 远程插件安装/卸载/搜索 | 看插件生态和外部下载 | +| `src/plugin/bf.ts` | `bf`, `hf` | 备份/恢复 plugins + assets | 对 leech 文件打包/上传有参考价值 | +| `src/plugin/sendLog.ts` | `sendlog`, `logs`, `log` | 发送日志文件 | 对文件发送、目标配置有参考价值 | +| `src/plugin/loglevel.ts` | `loglevel` | 调整日志等级 | 看 logger 持久化配置 | + +用户插件例子: + +- `plugins/moyu.ts`:下载外部图片 API -> 封装 `CustomFile` -> `sendFile()` 到 Telegram -> 删除命令消息。 + +## 5. 数据与持久化 + +| 数据 | 文件/目录 | 说明 | +|---|---|---| +| Telegram API/session | `config.json` | 运行后自动创建,不要提交真实 session | +| 环境变量样例 | `.env-sample` | `TB_PREFIX`, `TB_SUDO_PREFIX`, `TB_CMD_IGNORE_EDITED`, `TB_LISTENER_HANDLE_EDITED` | +| alias DB | `assets/alias/alias.db` | better-sqlite3 | +| sudo DB | `assets/sudo/sudo.db` | 用户/群授权 | +| sure DB | `assets/sure/sure.db` | 用户/群/消息规则 | +| sendlog DB | `assets/sendlog/sendlog.db` | 日志发送目标 | +| logger 配置 | `assets/logger/config.json` | lowdb JSON | +| reload 配置 | `assets/reload/config.json` | lowdb JSON | +| tpm 插件索引缓存 | `assets/tpm/plugins.json` | 远程插件索引缓存 | +| 临时文件 | `temp/*` | 下载/解压/运行临时文件建议放这里 | + +## 6. Leeching/V1Telebox V1 落地说明 + +本分支已把 V1 做成内置插件:`src/plugin/leech.ts`,核心工具放在 `src/utils/leech/*`。 + +配套文档: + +- `LEECH_README.md` +- `LEECH_ARCHITECTURE.md` +- `LEECH_FEATURES.md` + +验证脚本: + +- `npm run leech:smoke`:使用 fake Telegram client 验证 SQLite/structured log/date range 保存链路,并覆盖 `.leech login/chat/jobs/stats/db` 插件命令入口。 + +### 6.1 V1 功能边界 + +当前 V1 稳定闭环: + +- `.leech login` / `.leech session`:检查当前 Telegram session。 +- `.leech chat --from YYYY-MM-DD --to YYYY-MM-DD`:按日期范围抓 chat/group/channel 消息。 +- `.leech jobs [limit]`:查看最近任务。 +- `.leech stats`:查看 SQLite 保存统计。 +- `.leech db`:查看本地 DB 路径。 + +消息保存到 `assets/leech/leech.db`,structured log 同时输出到 console 和 `leech_actions` 表。 + +### 6.2 推荐模块拆分 + +```text +src/plugin/leech.ts +src/utils/leech/types.ts +src/utils/leech/dateRange.ts +src/utils/leech/json.ts +src/utils/leech/leechDB.ts +src/utils/leech/structuredLogger.ts +src/utils/leech/targetResolver.ts +src/utils/leech/messageSerializer.ts +src/utils/leech/leechService.ts +``` + +### 6.3 必须复用的现有能力 + +- `GenerationContext` + - 抓取 batch:`lifecycle.runTask(...)` + - timeout/timer:`lifecycle.setTimeout(...)` + - 后续如接外部下载器子进程:`lifecycle.trackChildProcess(...)` +- `pathHelpers` + - 配置/DB:`createDirectoryInAssets("leech")` +- `safeGetMessages.ts` + - 分批读取 Telegram 历史消息并保护已知 getMessages 崩溃。 +- `better-sqlite3` + - 保存 jobs/messages/actions 三类本地数据。 + +### 6.4 Leeching 状态机 + +```mermaid +stateDiagram-v2 + [*] --> running + running --> resolving_target + resolving_target --> fetching_batch + fetching_batch --> saving_batch + saving_batch --> fetching_batch: next batch + saving_batch --> completed: date boundary / no more / limit + running --> failed + resolving_target --> failed + fetching_batch --> failed + saving_batch --> failed +``` + +### 6.5 重点坑位 + +1. **重载后旧任务残留**:所有下载/upload/timer 必须绑定当前 `GenerationContext`。 +2. **命令只处理自己发出的消息**:这是 userbot,不是 bot token bot。 +3. **大范围抓取限制**:Telegram API 有 rate limit,大范围建议分段日期跑。 +4. **DB 体积增长**:`raw_json` 会增加 DB 体积,后续可加压缩/导出/清理策略。 +5. **不要提交 session**:`config.json`、`assets/*.db`、`temp/*` 不要进 git。 +6. **自更新插件会改 git**:`update.ts` 会 fetch/pull/reset;二开分支建议先禁用或改 remote/branch 逻辑。 +7. **exec 插件风险高**:个人版本可保留,公开发布建议默认禁用或加强权限。 +8. **License**:原仓库是 LGPL-2.1-only,fork 改动要保留版权与许可证声明。 + +## 7. 推荐逆向阅读顺序 + +1. `package.json`、`tsconfig.json` +2. `src/index.ts` +3. `src/utils/runtimeManager.ts` +4. `src/utils/apiConfig.ts`、`src/utils/loginManager.ts` +5. `src/utils/pluginBase.ts` +6. `src/utils/pluginManager.ts` +7. `plugins/moyu.ts` +8. `src/plugin/sendLog.ts`、`src/plugin/bf.ts` +9. `src/plugin/tpm.ts` +10. `src/utils/generationContext.ts` +11. `src/plugin/exec.ts`、`src/plugin/reload.ts` + +## 8. 后续本地命令 + +```powershell +cd "C:\Users\user\Desktop\TP AI Agent\TeleBox_reverse\TeleBox" +git status --short --branch +npm install +npm run dev +``` diff --git a/LEECH_ARCHITECTURE.md b/LEECH_ARCHITECTURE.md new file mode 100644 index 0000000..3edd52c --- /dev/null +++ b/LEECH_ARCHITECTURE.md @@ -0,0 +1,304 @@ +# TeleBox Leech V1 架构设计 + +## 1. 目标 + +在 TeleBox 原有 runtime/plugin 架构中加入一个可维护的 leech 模块,实现: + +- 复用 TeleBox Telegram session。 +- 按日期范围抓取 chat/group/channel 消息。 +- 保存到本地 SQLite。 +- 每个关键 action 结构化记录。 +- 插件代码可继续扩展到 media/file leech、导出、增量同步。 + +## 2. 模块图 + +```mermaid +flowchart TD + A["Telegram command: .leech ..."] --> B["src/plugin/leech.ts"] + B --> C["LeechService"] + C --> D["resolveLeechTarget()"] + C --> E["safeGetMessages() / teleproto"] + C --> F["serializeLeechMessage()"] + C --> G["LeechDB"] + C --> H["StructuredLeechLogger"] + G --> I["assets/leech/leech.db"] + H --> J["console JSON log"] + H --> K["leech_actions table"] +``` + +## 3. 文件职责 + +| 文件 | 责任 | +|---|---| +| `src/plugin/leech.ts` | Telegram 命令入口、参数解析、用户反馈 | +| `src/utils/leech/leechService.ts` | 核心 leech 流程:resolve target、分页抓取、保存、状态更新 | +| `src/utils/leech/leechDB.ts` | SQLite schema 与读写封装 | +| `src/utils/leech/structuredLogger.ts` | JSON structured log 输出 + action 持久化 | +| `src/utils/leech/targetResolver.ts` | target 解析:here、@username、数字 ID、t.me link | +| `src/utils/leech/messageSerializer.ts` | Telegram Message -> SQLite row | +| `src/utils/leech/dateRange.ts` | 日期范围解析 | +| `src/utils/leech/json.ts` | 安全 JSON 序列化、ID/number 转换 | +| `src/utils/leech/types.ts` | 类型定义 | +| `scripts/leech-smoke.ts` | 本地 smoke 验证脚本,不连接 Telegram | + +## 4. 数据流 + +1. 用户发送 `.leech chat ...`。 +2. `src/plugin/leech.ts` 解析参数。 +3. `LeechService.runChatLeech()` 创建 `actionId`。 +4. `resolveLeechTarget()` 把输入转换成 Telegram entity。 +5. `LeechDB.createJob()` 创建 job。 +6. 使用 `safeGetMessages()` 分批抓取消息。 +7. 每批消息经过 `serializeLeechMessage()` 转换。 +8. `LeechDB.upsertMessage()` 写入 `leech_messages`。 +9. 每个关键 action 调 `StructuredLeechLogger.log()`: + - console 输出 JSON + - 写入 `leech_actions` +10. 任务完成后更新 `leech_jobs.status = completed`。 + +## 5. 日期范围策略 + +Telegram message history 默认从新到旧读取。 + +Leech V1 使用: + +- `offsetDate = toTs + 1` +- `offsetId = lastMessage.id` +- 循环抓取直到: + - 到达 `fromTs` 边界 + - 没有更多消息 + - 达到 `--limit` + - runtime 被 abort/reload + +`--from YYYY-MM-DD` 会转换为当天 `00:00:00`。 +`--to YYYY-MM-DD` 会转换为当天 `23:59:59.999`。 + +## 6. Lifecycle / reload 设计 + +TeleBox 有 `GenerationContext`,每次 runtime reload 都会创建新 generation。 + +Leech V1 在抓取 batch 时使用: + +```ts +lifecycle.runTask(...) +``` + +这样 runtime reload/shutdown 时可以触发 abort/drain,避免旧抓取任务长期悬挂。 + +## 7. SQLite schema + +```mermaid +erDiagram + leech_jobs ||--o{ leech_messages : "first_job_id / last_job_id" + leech_jobs ||--o{ leech_actions : "job_id" + + leech_jobs { + integer id + text action_id + text target + text chat_id + text chat_title + text chat_type + integer from_ts + integer to_ts + text status + integer saved_count + integer scanned_count + text started_at + text finished_at + text error + } + + leech_messages { + text chat_id + integer message_id + integer date_ts + text date_iso + text sender_id + text message_text + text media_type + text raw_json + } + + leech_actions { + integer id + text action_id + integer job_id + text action + text status + text timestamp + text details_json + } +``` + +## 8. 后续扩展点 + +- 增量同步:记录每个 chat 上次抓取到的 message_id/date。 +- Media 下载:把 photo/document/video 下载到 `temp/leech_media` 或 `assets/leech_media`。 +- 导出:支持 `.leech export csv/json`. +- 查询:支持 `.leech search keyword`. +- 多任务队列:加入 concurrency control 和 cancel job。 + +## 9. 验证策略 + +当前可自动验证的部分: + +```powershell +npx tsc --noEmit +npm run leech:smoke +``` + +- `npx tsc --noEmit` 验证 TypeScript 类型。 +- `npm run leech:smoke` 使用 fake Telegram client 验证: + - 日期范围分页 + - SQLite job/message/action 写入 + - structured log 持久化 + - stats 读取 + - 插件命令入口:`.leech login`、`.leech chat`、`.leech jobs`、`.leech stats`、`.leech db` + +真实 Telegram 抓取仍需要有效 `config.json` session,并在 Telegram 内执行 `.leech chat ...` 做联调。 +# TeleBox Leech V1 Architecture / 架构设计 + +## 1. Goal / 目标 + +Integrate a maintainable leech module into TeleBox's existing runtime/plugin architecture to: + +在 TeleBox 原有 runtime/plugin 架构中加入一个可维护的 leech 模块,实现: + +- Reuse the TeleBox Telegram session (no separate login). / 复用 TeleBox Telegram session(无需单独登录)。 +- Fetch chat/group/channel messages by date range. / 按日期范围抓取 chat/group/channel 消息。 +- Save to local SQLite. / 保存到本地 SQLite。 +- Structured logging for every key action. / 每个关键 action 结构化记录。 +- Extensible to media download, export, incremental sync. / 可扩展到 media 下载、导出、增量同步。 + +## 2. Module Map / 模块图 + +```mermaid +flowchart TD + A["Telegram command: .leech ..."] --> B["src/plugin/leech.ts"] + B --> C["LeechService"] + C --> D["resolveLeechTarget()"] + C --> E["safeGetMessages() / teleproto"] + C --> F["serializeLeechMessage()"] + C --> G["LeechDB"] + C --> H["StructuredLeechLogger"] + G --> I["assets/leech/leech.db"] + H --> J["console JSON log"] + H --> K["leech_actions table"] +``` + +## 3. File Responsibilities / 文件职责 + +| File / 文件 | Responsibility / 职责 | +|---|---| +| `src/plugin/leech.ts` | Telegram command entry, argument parsing, user feedback / Telegram 命令入口、参数解析、用户反馈 | +| `src/utils/leech/leechService.ts` | Core leech flow: resolve target, fetch in batches, save, update status / 核心抓取流程 | +| `src/utils/leech/leechDB.ts` | SQLite schema and CRUD operations / SQLite schema 与读写封装 | +| `src/utils/leech/structuredLogger.ts` | JSON structured log output + action persistence / 结构化日志输出 + 持久化 | +| `src/utils/leech/targetResolver.ts` | Target resolution: here, @username, numeric ID, t.me link / Target 解析 | +| `src/utils/leech/messageSerializer.ts` | Telegram Message -> SQLite row / 消息序列化为 SQLite 行 | +| `src/utils/leech/dateRange.ts` | Date range parsing / 日期范围解析 | +| `src/utils/leech/json.ts` | Safe JSON serialization, ID/number conversion / 安全 JSON 序列化 | +| `src/utils/leech/types.ts` | TypeScript type definitions / 类型定义 | +| `scripts/leech-smoke.ts` | Local smoke test (no Telegram connection) / 本地冒烟测试 | + +## 4. Data Flow / 数据流 + +1. User sends `.leech chat ...` in Telegram. / 用户在 Telegram 发送 `.leech chat ...`。 +2. `src/plugin/leech.ts` parses the command arguments. / 插件解析命令参数。 +3. `LeechService.runChatLeech()` creates a unique `actionId`. / 服务创建唯一 `actionId`。 +4. `resolveLeechTarget()` converts the input to a Telegram entity. / 将输入转换为 Telegram entity。 +5. `LeechDB.createJob()` inserts a job row. / 插入 job 行。 +6. `safeGetMessages()` fetches messages in batches. / 分批抓取消息。 +7. `serializeLeechMessage()` converts each message to a SQLite row. / 每条消息转换为 SQLite 行。 +8. `LeechDB.upsertMessage()` writes to `leech_messages`. / 写入 `leech_messages`。 +9. Every key action calls `StructuredLeechLogger.log()`: console JSON + `leech_actions` table. / 每个关键动作输出 JSON 日志并写入表。 +10. On completion, update `leech_jobs.status = completed`. / 完成后更新 job 状态。 + +## 5. Date Range Strategy / 日期范围策略 + +Telegram message history reads from newest to oldest by default. / Telegram 消息历史默认从新到旧读取。 + +Leech V1 uses: + +- `offsetDate = toTs + 1` (exclusive boundary / 排除边界) +- `offsetId = lastMessage.id` +- Loop until: `fromTs` boundary reached / no more messages / `--limit` reached / runtime aborted + +`--from YYYY-MM-DD` is converted to `00:00:00`. / 转换为当天 00:00:00。 +`--to YYYY-MM-DD` is converted to `23:59:59.999`. / 转换为当天 23:59:59.999。 + +## 6. Lifecycle / Reload Design / 生命周期与重载设计 + +TeleBox uses `GenerationContext` for each runtime generation. / TeleBox 使用 `GenerationContext` 管理每次 runtime 代次。 + +Leech V1 binds batch fetching to the current generation via `lifecycle.runTask(...)`, so a runtime reload/shutdown will trigger abort/drain and prevent stale tasks from hanging. + +Leech V1 通过 `lifecycle.runTask(...)` 绑定到当前 generation,runtime reload/shutdown 时会触发 abort/drain,避免旧任务悬挂。 + +## 7. SQLite Schema / 数据库结构 + +```mermaid +erDiagram + leech_jobs ||--o{ leech_messages : "first_job_id / last_job_id" + leech_jobs ||--o{ leech_actions : "job_id" + + leech_jobs { + integer id + text action_id + text target + text chat_id + integer from_ts + integer to_ts + text status + integer saved_count + integer scanned_count + } + + leech_messages { + text chat_id + integer message_id + integer date_ts + text date_iso + text sender_id + text message_text + text raw_json + text media_type + } + + leech_actions { + integer id + text action_id + integer job_id + text action + text status + text timestamp + text details_json + } +``` + +## 8. Future Extensions / 后续扩展 + +| Extension / 扩展 | Description / 说明 | +|---|---| +| Incremental sync / 增量同步 | Track last fetched message_id/date per chat / 记录每个 chat 上次抓取位置 | +| Media download / 媒体下载 | Download photo/document/video to `temp/leech_media` / 下载文件到本地 | +| Export / 导出 | `.leech export csv/json` | +| Search / 搜索 | `.leech search keyword` | +| Job queue / 任务队列 | Concurrency control and cancel support / 并发控制与取消 | + +## 9. Verification Strategy / 验证策略 + +```powershell +npx tsc --noEmit # TypeScript type check / 类型检查 +npm run leech:smoke # Local smoke test / 本地冒烟测试 +``` + +The smoke test uses a fake Telegram client to verify: / 冒烟测试使用 fake Telegram client 验证: + +- Date range pagination / 日期范围分页 +- SQLite job/message/action persistence / SQLite 持久化 +- Structured log output / 结构化日志 +- Plugin command entry points: `.leech login/chat/jobs/stats/db` / 插件命令入口 + +Real Telegram fetching requires a valid `config.json` session and testing `.leech chat ...` in Telegram. / 真实抓取需要有效 session 并在 Telegram 内测试。 diff --git a/LEECH_FEATURES.md b/LEECH_FEATURES.md new file mode 100644 index 0000000..97ed9c3 --- /dev/null +++ b/LEECH_FEATURES.md @@ -0,0 +1,326 @@ +# TeleBox Leech V1 功能解释 + +## 1. Login Telegram Session + +TeleBox 本身已经有 Telegram session 登录流程: + +- `src/utils/apiConfig.ts` 管理 `config.json` +- `src/utils/loginManager.ts` 管理 QR / 手机号登录 +- `src/utils/runtimeManager.ts` 创建并持有全局 `TelegramClient` + +Leech V1 不重新实现一套独立登录,而是复用 TeleBox runtime 的登录状态。 + +新增命令: + +```text +.leech login +.leech session +``` + +用途: + +- 检查当前 session 是否可用。 +- 输出当前登录用户 ID、username、name。 +- 记录 structured log:`session.check` + +## 2. Leech chat/group messages with date range + +命令: + +```text +.leech chat --from --to [--limit N] [--batch N] +``` + +Target 支持: + +| target | 说明 | +|---|---| +| `here` | 当前命令所在 chat/group/channel | +| `@username` | 公开群组/频道/用户 | +| `-100123456789` | Telegram supergroup/channel ID | +| `https://t.me/name` | t.me 公开链接 | +| `https://t.me/c/123456789/1` | t.me 私有群/频道消息链接,会转换成 `-100123456789` | + +日期: + +| 参数 | 说明 | +|---|---| +| `--from 2026-01-01` | 从 2026-01-01 00:00:00 开始 | +| `--to 2026-01-31` | 到 2026-01-31 23:59:59 结束 | + +抓取逻辑: + +1. 从 `--to` 附近开始向旧消息分页。 +2. 每批最多 100 条。 +3. 保存落在 `[from, to]` 内的消息。 +4. 如果消息时间早于 `from`,停止。 + +## 3. Save to local SQLite DB + +默认 DB 路径: + +```text +assets/leech/leech.db +``` + +查看命令: + +```text +.leech db +``` + +主要表: + +- `leech_jobs`:任务元数据。 +- `leech_messages`:消息数据。 +- `leech_actions`:结构化 action log。 + +`leech_messages` 使用 `(chat_id, message_id)` 作为 primary key,所以重复抓同一范围会 upsert,不会无限重复插入。 + +## 4. Structured log + +每个关键动作都会有 JSON log: + +- `command.` +- `session.check` +- `chat.leech.command` +- `chat.resolve_target` +- `chat.fetch_batch` +- `chat.save_batch` +- `chat.save_message`(跳过异常消息时) + +每条 log 同时写入: + +1. Console stdout +2. SQLite `leech_actions` + +这样后续可以直接用 SQL 回放任务: + +```sql +SELECT * +FROM leech_actions +WHERE action_id = 'leech_chat_xxx' +ORDER BY id ASC; +``` + +## 5. Commands + +### `.leech help` + +显示帮助。 + +### `.leech login` + +检查 session。 + +### `.leech chat` + +抓消息。 + +Examples: + +```text +.leech chat here --from 2026-01-01 --to 2026-01-31 +.leech chat @mygroup --from 2026-01-01 --to 2026-01-31 --limit 1000 +.leech chat https://t.me/c/123456789/1 --from 2026-01-01 --to 2026-01-02 +``` + +### `.leech jobs` + +查看最近任务。 + +```text +.leech jobs +.leech jobs 20 +``` + +### `.leech stats` + +查看保存统计。 + +### `.leech db` + +查看 SQLite DB 路径。 + +## 6. 代码注释风格 + +核心模块的关键逻辑使用中英文注释: + +```ts +// Telegram offsetDate is exclusive, so +1 second keeps the --to second inclusive. +// Telegram 的 offsetDate 是排除边界,因此 +1 秒来保证 --to 当秒被包含。 +``` + +这样方便后续中文沟通,也保留英文技术上下文。 + +## 7. 本地验证 + +```powershell +npm run leech:smoke +``` + +这个命令不会连接 Telegram,会使用 fake client 模拟 3 条消息,并跑两层验证: + +1. `LeechService` service 层。 +2. `src/plugin/leech.ts` 插件命令层。 + +- 2 条在日期范围内,应保存到 SQLite。 +- 1 条早于 `--from`,应触发边界停止。 + +验证点: + +- `leech_jobs` 有 1 条任务。 +- `leech_messages` 有 2 条消息。 +- `leech_actions` 有多条 structured action log。 +- `LeechService.stats()` 能读取统计结果。 +- 插件命令 `.leech login/chat/jobs/stats/db` 都能返回预期文本。 +# TeleBox Leech V1 Features / 功能解释 + +## 1. Login Telegram Session / 登录 Telegram Session + +TeleBox already has a Telegram session login flow built-in: + +TeleBox 本身已经有 Telegram session 登录流程: + +- `src/utils/apiConfig.ts` manages `config.json` / 管理 `config.json` +- `src/utils/loginManager.ts` handles QR / phone login / 处理 QR / 手机号登录 +- `src/utils/runtimeManager.ts` creates and holds the global `TelegramClient` / 创建并持有全局 `TelegramClient` + +Leech V1 does NOT implement a separate login. It reuses the TeleBox runtime login. + +Leech V1 不重新实现登录,而是复用 TeleBox runtime 的登录状态。 + +Commands / 命令: + +```text +.leech login +.leech session +``` + +Purpose / 用途: +- Check if the current session is valid. / 检查当前 session 是否可用。 +- Output current user ID, username, name. / 输出当前登录用户信息。 +- Structured log action: `session.check` / 结构化日志 action。 + +## 2. Leech Chat/Group Messages with Date Range / 按日期范围抓取消息 + +Command / 命令: + +```text +.leech chat --from --to [--limit N] [--batch N] +``` + +### Supported Targets / 支持的 Target + +| Target | Description / 说明 | +|---|---| +| `here` | Current chat/group/channel where the command is sent / 命令所在的当前 chat | +| `@username` | Public group/channel/user / 公开群组/频道/用户 | +| `-100123456789` | Telegram supergroup/channel ID | +| `https://t.me/name` | Public t.me link / 公开链接 | +| `https://t.me/c/123456789/1` | Private link, converted to `-100123456789` / 私有链接,自动转换 | + +### Date Parameters / 日期参数 + +| Param / 参数 | Description / 说明 | +|---|---| +| `--from 2026-01-01` | Start from 2026-01-01 00:00:00 / 从 2026-01-01 00:00:00 开始 | +| `--to 2026-01-31` | End at 2026-01-31 23:59:59 / 到 2026-01-31 23:59:59 结束 | + +### Fetch Logic / 抓取逻辑 + +1. Start from `--to` and paginate toward older messages. / 从 `--to` 附近开始向旧消息分页。 +2. Fetch in batches of up to 100. / 每批最多 100 条。 +3. Save messages within `[from, to]`. / 保存落在日期范围内的消息。 +4. Stop if a message is older than `from`. / 消息早于 `from` 时停止。 + +## 3. Save to Local SQLite DB / 保存到本地 SQLite 数据库 + +Default DB path / 默认路径: + +```text +assets/leech/leech.db +``` + +Check with / 查看:`.leech db` + +Tables / 表: +- `leech_jobs` — task metadata / 任务元数据 +- `leech_messages` — message data / 消息数据 +- `leech_actions` — structured action log / 结构化 action 日志 + +`leech_messages` uses `(chat_id, message_id)` as primary key, so re-fetching the same range will upsert (not duplicate). + +`leech_messages` 使用 `(chat_id, message_id)` 作为主键,重复抓取同一范围会 upsert,不会重复插入。 + +## 4. Structured Log / 结构化日志 + +Every key action produces a JSON log entry, written to both: + +每个关键动作都会生成 JSON 日志,同时写入: + +1. Console stdout +2. SQLite `leech_actions` table + +Key actions / 关键 action: +- `command.` — command entry / 命令入口 +- `session.check` — session verification / session 检查 +- `chat.leech.command` — main leech flow / 主抓取流程 +- `chat.resolve_target` — target resolution / target 解析 +- `chat.fetch_batch` — batch fetching / 分批抓取 +- `chat.save_batch` — batch saving / 分批保存 + +Query example / 查询示例: + +```sql +SELECT * FROM leech_actions +WHERE action_id = 'leech_chat_xxx' +ORDER BY id ASC; +``` + +## 5. All Commands / 全部命令 + +| Command / 命令 | Description / 说明 | +|---|---| +| `.leech help` | Show help / 显示帮助 | +| `.leech login` / `.leech session` | Check Telegram session / 检查 session | +| `.leech chat --from --to ` | Fetch messages / 抓取消息 | +| `.leech jobs [N]` | View recent jobs / 查看最近任务 | +| `.leech stats` | View SQLite stats / 查看保存统计 | +| `.leech db` | Show DB path / 查看数据库路径 | + +## 6. Code Comment Style / 代码注释风格 + +Core logic uses bilingual comments: + +核心逻辑使用中英文注释: + +```ts +// Telegram offsetDate is exclusive, so +1 second keeps the --to second inclusive. +// Telegram 的 offsetDate 是排除边界,因此 +1 秒来保证 --to 当秒被包含。 +``` + +This facilitates Chinese communication while preserving English technical context. + +这样方便中文沟通,同时保留英文技术上下文。 + +## 7. Local Verification / 本地验证 + +```powershell +npm run leech:smoke +``` + +This command does NOT connect to Telegram. It uses a fake client to simulate 3 messages: + +这个命令不会连接 Telegram,会使用 fake client 模拟 3 条消息: + +- 2 messages within the date range → should be saved. / 2 条在范围内 → 应保存。 +- 1 message before `--from` → should trigger boundary stop. / 1 条早于 `--from` → 触发边界停止。 + +Verification points / 验证点: + +- `leech_jobs` has 1 row / 有 1 条任务 +- `leech_messages` has 2 rows / 有 2 条消息 +- `leech_actions` has multiple structured logs / 有多条结构化日志 +- `LeechService.stats()` returns correct statistics / 返回正确统计 +- Plugin commands `.leech login/chat/jobs/stats/db` return expected output / 插件命令返回预期文本 diff --git a/LEECH_README.md b/LEECH_README.md new file mode 100644 index 0000000..50ea969 --- /dev/null +++ b/LEECH_README.md @@ -0,0 +1,363 @@ +# TeleBox Leech V1 README + +TeleBox Leech V1 是一个内置 Telegram Userbot 插件,用当前 TeleBox 登录的 Telegram session 抓取 chat/group/channel 的历史消息,并保存到本地 SQLite。 + +## 功能清单 + +1. Login Telegram Session + - TeleBox runtime 负责真实登录。 + - Leech 插件提供 `.leech login` / `.leech session` 检查当前 session。 +2. Leech chat / group messages with date range + - `.leech chat --from YYYY-MM-DD --to YYYY-MM-DD` + - 支持 `here`、`@username`、数字 ID、`https://t.me/...` 链接。 +3. Save to local SQLite DB + - 默认 DB:`assets/leech/leech.db` + - 表:`leech_jobs`、`leech_messages`、`leech_actions` +4. Structured log for every action + - 每个关键 action 都会输出 JSON log 到 console。 + - 同时写入 `leech_actions` 表,方便审计。 +5. Bilingual comments + - 核心代码包含中英文注释,方便后续二开。 + +## 快速开始 + +```powershell +cd "C:\Users\user\Desktop\TP AI Agent\TeleBox_reverse\TeleBox" +npm install +npm start +``` + +第一次启动会要求输入 Telegram `api_id`、`api_hash`,然后选择 QR login 或手机号登录。登录成功后 session 会保存到项目根目录 `config.json`。 + +> 不要提交真实 `config.json`,里面有 Telegram session。 + +## 本地 smoke 验证 + +不连接 Telegram,只验证 Leech V1 的 SQLite schema、structured log、日期范围分页、保存链路和插件命令入口: + +```powershell +npm run leech:smoke +``` + +成功时会输出: + +```text +leech smoke ok +``` + +并创建临时 SQLite DB: + +- `temp/leech-smoke.db`:service 层验证。 +- `temp/leech-plugin-smoke.db`:插件命令层验证,覆盖 `.leech login/chat/jobs/stats/db`。 + +## 命令 + +### 检查登录 session + +```text +.leech login +.leech session +``` + +输出当前 Telegram 用户 ID、username、name。 + +### 抓当前聊天 + +```text +.leech chat here --from 2026-01-01 --to 2026-01-31 +``` + +`here` 表示命令所在的当前 chat/group/channel。 + +### 抓指定群组/频道 + +```text +.leech chat @example_group --from 2026-01-01 --to 2026-01-31 --limit 500 --batch 100 +``` + +参数: + +- `--from`:开始日期,必填。 +- `--to`:结束日期,必填。 +- `--limit`:最多保存多少条消息,可选,不填表示按日期范围抓到边界。 +- `--batch`:每批抓取数量,1-100,默认 100。 + +### 查看任务 + +```text +.leech jobs +.leech jobs 20 +``` + +### 查看统计 + +```text +.leech stats +``` + +### 查看 DB 路径 + +```text +.leech db +``` + +## SQLite 表说明 + +### `leech_jobs` + +每一次 leech 任务的元数据: + +- target +- chat_id / chat_title / chat_type +- from_ts / to_ts +- status +- saved_count / scanned_count +- started_at / finished_at +- error + +### `leech_messages` + +保存每条 Telegram message: + +- chat_id + message_id +- date_ts / date_iso +- sender_id / sender_username / sender_name +- message_text +- media_type +- reply_to_msg_id +- views / forwards +- raw_json + +### `leech_actions` + +结构化 action log: + +- action_id +- job_id +- action +- status +- timestamp +- actor +- target +- details_json + +## Structured log 示例 + +```json +{ + "scope": "telebox.leech", + "timestamp": "2026-06-27T01:00:00.000Z", + "actionId": "leech_chat_1780000000000_abcd1234", + "jobId": 1, + "action": "chat.fetch_batch", + "status": "success", + "actor": "123456", + "target": "-100123456789", + "details": { + "batchNo": 1, + "received": 100 + } +} +``` + +## 注意事项 + +- 这是 userbot:默认只响应自己发出的命令消息。 +- Telegram API 有 rate limit;大范围抓取建议分段跑。 +- `assets/leech/leech.db` 属于本地数据,不要提交到 git。 +- 可用 `TB_LEECH_DB_PATH` 临时覆盖 DB 路径,方便测试或隔离环境。 +- 如果需要导出,可直接使用 SQLite 工具读取 DB。 +# TeleBox Leech V1 README / TeleBox Leech V1 说明文档 + +TeleBox Leech V1 is a built-in Telegram Userbot plugin that uses the current TeleBox session to fetch historical messages from any chat/group/channel within a date range, and saves them to a local SQLite database. + +TeleBox Leech V1 是一个内置 Telegram Userbot 插件,用当前 TeleBox 登录的 Telegram session 抓取 chat/group/channel 的历史消息,并保存到本地 SQLite。 + +## Features / 功能清单 + +1. **Login Telegram Session / 登录 Telegram Session** + - TeleBox runtime handles the actual Telegram login. + - TeleBox runtime 负责真正的 Telegram 登录。 + - The leech plugin provides `.leech login` / `.leech session` to check the current session status. + - Leech 插件提供 `.leech login` / `.leech session` 检查当前 session 状态。 + +2. **Leech chat / group messages with date range / 按日期范围抓取 chat/group 消息** + - `.leech chat --from YYYY-MM-DD --to YYYY-MM-DD` + - Supports `here`, `@username`, numeric ID, `https://t.me/...` links. + - 支持 `here`、`@username`、数字 ID、`https://t.me/...` 链接。 + +3. **Save to local SQLite DB / 保存到本地 SQLite 数据库** + - Default DB path: `assets/leech/leech.db` + - Tables: `leech_jobs`, `leech_messages`, `leech_actions` + - 默认数据库路径:`assets/leech/leech.db` + - 表:`leech_jobs`、`leech_messages`、`leech_actions` + +4. **Structured log for every action / 每个 action 都有结构化日志** + - Every key action outputs a JSON log to the console and writes to the `leech_actions` table. + - 每个关键 action 都会输出 JSON log 到 console,并写入 `leech_actions` 表。 + +5. **Bilingual code comments / 中英文代码注释** + - Core logic uses bilingual comments (English + Chinese) for easier maintenance. + - 核心逻辑使用中英文注释,方便后续维护。 + +## Quick Start / 快速开始 + +```powershell +cd "C:\Users\user\Desktop\TP AI Agent\TeleBox_reverse\TeleBox" +npm install +npm start +``` + +On first start, you will be prompted for Telegram `api_id` and `api_hash`, then choose QR login or phone login. The session is saved to `config.json` in the project root. + +第一次启动会要求输入 Telegram `api_id`、`api_hash`,然后选择 QR 登录或手机号登录。登录成功后 session 会保存到项目根目录 `config.json`。 + +> Do NOT commit real `config.json` — it contains your Telegram session. +> 不要提交真实 `config.json`,里面有你的 Telegram session。 + +## Local Smoke Test / 本地冒烟测试 + +Without connecting to Telegram, verifies the SQLite schema, structured log, date range pagination, save pipeline, and plugin command entry points: + +不连接 Telegram,只验证 SQLite schema、结构化日志、日期范围分页、保存链路和插件命令入口: + +```powershell +npm run leech:smoke +``` + +On success it outputs `leech service smoke ok` and `leech plugin smoke ok`, creating temporary SQLite DBs: + +成功时输出 `leech service smoke ok` 和 `leech plugin smoke ok`,并创建临时 SQLite DB: + +- `temp/leech-smoke.db` — service layer / 服务层验证 +- `temp/leech-plugin-smoke.db` — plugin command layer / 插件命令层验证 (covers `.leech login/chat/jobs/stats/db`) + +## Commands / 命令 + +### Check login session / 检查登录 session + +```text +.leech login +.leech session +``` + +Outputs the current Telegram user ID, username, and name. / 输出当前 Telegram 用户 ID、用户名和昵称。 + +### Fetch current chat / 抓取当前聊天 + +```text +.leech chat here --from 2026-01-01 --to 2026-01-31 +``` + +`here` means the chat/group/channel where the command is sent. / `here` 表示命令所在的当前 chat/group/channel。 + +### Fetch specific group/channel / 抓取指定群组/频道 + +```text +.leech chat @example_group --from 2026-01-01 --to 2026-01-31 --limit 500 --batch 100 +``` + +| Param / 参数 | Description / 说明 | +|---|---| +| `--from` | Start date (required) / 开始日期(必填) | +| `--to` | End date (required) / 结束日期(必填) | +| `--limit` | Max messages to save (optional) / 最多保存条数(可选) | +| `--batch` | Batch size per fetch, 1–100, default 100 / 每批抓取数量 | + +### View jobs / 查看任务 + +```text +.leech jobs +.leech jobs 20 +``` + +### View stats / 查看统计 + +```text +.leech stats +``` + +### View DB path / 查看数据库路径 + +```text +.leech db +``` + +## SQLite Table Schema / SQLite 表结构 + +### `leech_jobs` + +Metadata for each leech task. / 每次 leech 任务的元数据。 + +| Column | Type | Description / 说明 | +|---|---|---| +| id | INTEGER PK | Auto-increment job ID / 自增任务 ID | +| action_id | TEXT | Structured log action ID / 结构化日志 action ID | +| target | TEXT | User-supplied target / 用户传入的 target | +| chat_id | TEXT | Telegram chat ID | +| chat_title | TEXT | Chat title / 群名 | +| from_ts / to_ts | INTEGER | Date range (Unix seconds) / 日期范围(Unix 秒) | +| status | TEXT | `running` / `completed` / `failed` | +| saved_count | INTEGER | Messages saved / 已保存消息数 | +| scanned_count | INTEGER | Messages scanned / 已扫描消息数 | + +### `leech_messages` + +Saved Telegram messages. Uses `(chat_id, message_id)` as primary key for upsert. + +保存的 Telegram 消息。使用 `(chat_id, message_id)` 作为主键,支持 upsert。 + +| Column | Type | Description / 说明 | +|---|---|---| +| chat_id | TEXT | Telegram chat ID | +| message_id | INTEGER | Telegram message ID | +| date_ts / date_iso | INTEGER / TEXT | Message timestamp / 消息时间戳 | +| sender_id | TEXT | Sender user ID / 发送者 ID | +| sender_username | TEXT | Sender username / 发送者用户名 | +| sender_name | TEXT | Sender display name / 发送者昵称 | +| message_text | TEXT | Message body text / 消息正文 | +| media_type | TEXT | Media class name / 媒体类型 | +| raw_json | TEXT | Full message snapshot / 完整消息快照 | + +### `leech_actions` + +Structured action log for audit trail. / 结构化 action 日志,用于审计。 + +| Column | Type | Description / 说明 | +|---|---|---| +| action_id | TEXT | Unique action ID / 唯一 action ID | +| job_id | INTEGER | Associated job ID / 关联任务 ID | +| action | TEXT | Action name / 动作名称 | +| status | TEXT | `start` / `progress` / `success` / `error` / `skipped` | +| timestamp | TEXT | ISO 8601 timestamp / ISO 8601 时间戳 | +| details_json | TEXT | JSON payload / JSON 详情 | + +## Structured Log Example / 结构化日志示例 + +```json +{ + "scope": "telebox.leech", + "timestamp": "2026-06-27T01:00:00.000Z", + "actionId": "leech_chat_1780000000000_abcd1234", + "jobId": 1, + "action": "chat.fetch_batch", + "status": "success", + "actor": "123456", + "target": "-100123456789", + "details": { + "batchNo": 1, + "received": 100 + } +} +``` + +## Notes / 注意事项 + +- This is a userbot: it only responds to commands sent by the logged-in user by default. +- 这是 userbot:默认只响应登录用户自己发出的命令。 +- Telegram API has rate limits; for large date ranges, consider splitting into smaller segments. +- Telegram API 有速率限制;大范围抓取建议分段执行。 +- `assets/leech/leech.db` is local data — do NOT commit to git. +- `assets/leech/leech.db` 是本地数据,不要提交到 git。 +- Use `TB_LEECH_DB_PATH` env var to override DB path for testing or isolation. +- 可用 `TB_LEECH_DB_PATH` 环境变量临时覆盖数据库路径。 diff --git a/STRUCTURE.md b/STRUCTURE.md new file mode 100644 index 0000000..ee0a37b --- /dev/null +++ b/STRUCTURE.md @@ -0,0 +1,242 @@ +# TeleBox 项目结构说明 + +> 用途:快速看懂 TeleBox 的目录结构,方便后续逆向和二开。 +> 路径基准:`C:\Users\user\Desktop\TP AI Agent\TeleBox_reverse\TeleBox` +> 分支:`feature/TelegramUserbot/Leeching/V1Telebox` +> commit:`6902a35` + +## 1. 顶层结构 + +```text +TeleBox/ +|-- .env-sample # 环境变量样例 +|-- .npmrc # npm 配置 +|-- .nvmrc # Node 版本提示 +|-- CHANGELOG.md # 版本变更记录 +|-- INSTALL.md # 安装指南 +|-- LICENSE # LGPL-2.1-only +|-- README.md # 项目介绍 +|-- TELEBOX_DEVELOPMENT.md # 官方开发规范/插件开发文档 +|-- LEECH_README.md # Leech V1 使用说明 +|-- LEECH_ARCHITECTURE.md # Leech V1 架构设计 +|-- LEECH_FEATURES.md # Leech V1 功能解释 +|-- package.json # npm scripts + dependencies +|-- tsconfig.json # TS 配置,@utils/* alias +|-- scripts/ +| `-- run-tsx.cjs # 用 tsx 启动 TS 入口 +|-- src/ +| |-- index.ts # 程序入口 +| |-- hook/ # teleproto / Message patch +| |-- plugin/ # 内置插件 +| `-- utils/ # runtime、插件、DB、日志、格式化工具 +|-- plugins/ +| `-- moyu.ts # 用户插件示例 +|-- assets/ # 运行期数据目录 +|-- temp/ # 临时目录 +`-- node_modules/ # 依赖目录;仓库只跟踪 .gitkeep +``` + +## 2. npm scripts + +| script | 命令 | 用途 | +|---|---|---| +| `start` | `node scripts/run-tsx.cjs ./src/index.ts` | 正式启动 | +| `dev` | `NODE_ENV=development node scripts/run-tsx.cjs ./src/index.ts` | 开发模式启动 | +| `tpm` | `node scripts/run-tsx.cjs ./src/plugin/tpm.ts` | 单独运行 TPM 相关入口 | +| `leech:smoke` | `node scripts/run-tsx.cjs ./scripts/leech-smoke.ts` | 本地验证 Leech SQLite/structured log/date range 链路 | + +## 3. `src/` 结构 + +### 3.1 入口与 Hook + +| 文件 | 行数 | 说明 | +|---|---:|---| +| `src/index.ts` | 27 | 入口;加载 dotenv/logger/hook;注册全局异常;启动 runtime | +| `src/hook/listen.ts` | 40 | 可选 patch `Message.edit`,当前入口里未启用 `patchMsgEdit()` | +| `src/hook/patches/telegram.patch.ts` | 62 | patch HTMLParser;给 `Api.Message` 增加 `deleteWithDelay`、`safeDelete` | +| `src/hook/types/telegram.d.ts` | 38 | TypeScript module augmentation,声明新增 Message 方法 | +| `scripts/leech-smoke.ts` | 260 | 本地 smoke 测试,不连接 Telegram,覆盖 service 层和插件命令层 | + +### 3.2 `src/utils/` 核心工具 + +| 文件 | 行数 | 主要职责 | +|---|---:|---| +| `src/utils/runtimeManager.ts` | 401 | runtime 状态机、TelegramClient 创建、start/reload/shutdown、generation 管理 | +| `src/utils/generationContext.ts` | 550 | 生命周期容器;追踪 task/timer/listener/child-process;drain/dispose | +| `src/utils/pluginManager.ts` | 596 | 插件扫描、require cache 清理、命令解析、事件注册、cron 注册、reload 入口 | +| `src/utils/pluginBase.ts` | 99 | Plugin 抽象类、插件有效性校验 | +| `src/utils/globalClient.ts` | 9 | 从 runtimeManager 重导出 global client/generation API | +| `src/utils/apiConfig.ts` | 100 | `config.json` 读写、首次 API_ID/API_HASH 输入、保存 session | +| `src/utils/loginManager.ts` | 262 | Telegram 登录:已有 session、QR、手机号、2FA | +| `src/utils/logger.ts` | 398 | console 覆写、日志等级、GramJS 日志降级/限流 | +| `src/utils/cronManager.ts` | 105 | cron 任务注册/删除/清理,接入 GenerationContext | +| `src/utils/pathHelpers.ts` | 38 | 创建 `assets/*`、`temp/*` 子目录 | +| `src/utils/npm_install.ts` | 66 | npm install 包/项目依赖,清理 npm 环境变量后执行 | +| `src/utils/authGuards.ts` | 28 | AUTH_KEY_UNREGISTERED 判断与 safe auth/getMe 包装 | +| `src/utils/safeGetMessages.ts` | 51 | 安全获取消息/回复消息 | +| `src/utils/channelGapBreaker.ts` | 336 | 频道 gap 错误熔断/退避 | +| `src/utils/entityHelpers.ts` | 362 | Telegram entity 解析/重试/取消 | +| `src/utils/conversation.ts` | 277 | 对话等待/取消/超时封装 | +| `src/utils/aliasDB.ts` | 106 | alias SQLite 数据库 | +| `src/utils/sudoDB.ts` | 144 | sudo 用户/群 SQLite 数据库 | +| `src/utils/sureDB.ts` | 192 | sure 用户/群/消息 SQLite 数据库 | +| `src/utils/sendLogDB.ts` | 47 | sendlog 目标配置 SQLite 数据库 | +| `src/utils/telegramFormatter.ts` | 583 | Telegram HTML/实体格式化 | +| `src/utils/telegraphFormatter.ts` | 761 | Telegraph 页面节点格式化 | +| `src/utils/teleboxInfoHelper.ts` | 49 | 读取 app 名、git commit、config appName | +| `src/utils/tlRevive.ts` | 88 | TL/JSON 结构 revive 辅助 | +| `src/utils/banUtils.ts` | 285 | ban/kick 相关 Telegram 工具 | + +### 3.2.1 `src/utils/leech/` Leech V1 工具 + +| 文件 | 主要职责 | +|---|---| +| `src/utils/leech/types.ts` | Leech 类型定义 | +| `src/utils/leech/dateRange.ts` | 日期范围解析,`YYYY-MM-DD` 转 Telegram 秒级时间戳 | +| `src/utils/leech/json.ts` | 安全 JSON 序列化、ID/number 转换 | +| `src/utils/leech/leechDB.ts` | SQLite schema 与 jobs/messages/actions 读写 | +| `src/utils/leech/structuredLogger.ts` | JSON structured log 输出并写入 DB | +| `src/utils/leech/targetResolver.ts` | target 解析:here、@username、数字 ID、t.me link | +| `src/utils/leech/messageSerializer.ts` | Telegram Message 转 SQLite row | +| `src/utils/leech/leechService.ts` | 核心 leech 流程:resolve/fetch/save/log | + +### 3.3 `src/plugin/` 内置插件 + +| 文件 | 行数 | 命令/入口 | 说明 | +|---|---:|---|---| +| `src/plugin/help.ts` | 294 | `help`, `h` | 帮助系统 | +| `src/plugin/alias.ts` | 144 | `alias` | 命令别名 set/del/list | +| `src/plugin/prefix.ts` | 117 | `prefix` | 运行时前缀查看/设置 | +| `src/plugin/sudo.ts` | 282 | `sudo` | sudo 用户/群权限与消息监听代执行 | +| `src/plugin/sure.ts` | 405 | `sure` | sure 白名单/消息规则 | +| `src/plugin/re.ts` | 143 | `re` | 回复复读 | +| `src/plugin/debug.ts` | 754 | `id`, `entity` | 用户/群/频道/message link 解析与调试 | +| `src/plugin/ping.ts` | 467 | `ping` | Telegram API/ICMP/DC 延迟测试 | +| `src/plugin/exec.ts` | 169 | `exec` | 通过 Telegram 执行 shell 命令 | +| `src/plugin/status.ts` | 972 | `status`, `sysinfo` | 系统状态/运行状态 | +| `src/plugin/reload.ts` | 759 | `reload` | 插件/进程重载、内存监控 | +| `src/plugin/update.ts` | 144 | `update` | git 更新 + npm install | +| `src/plugin/tpm.ts` | 1439 | `tpm` | 远程插件安装/卸载/搜索 | +| `src/plugin/bf.ts` | 720 | `bf`, `hf` | 备份/恢复 plugins + assets | +| `src/plugin/sendLog.ts` | 257 | `sendlog`, `logs`, `log` | 发送日志文件 | +| `src/plugin/loglevel.ts` | 98 | `loglevel` | 调整日志等级 | +| `src/plugin/leech.ts` | 303 | `leech` | Leech V1:session 检查、按日期抓消息、保存 SQLite | + +### 3.4 `plugins/` 用户插件 + +| 文件 | 行数 | 命令 | 说明 | +|---|---:|---|---| +| `plugins/moyu.ts` | 51 | `moyu` | 从外部 API 下载图片,用 `CustomFile` 上传到 Telegram | + +## 4. 运行期目录约定 + +| 目录 | 用途 | 建议 | +|---|---|---| +| `assets/` | 长期数据、SQLite、JSON config、插件缓存 | 二开新增功能配置放 `assets//` | +| `temp/` | 临时下载/解压/生成文件 | leech 下载临时文件放 `temp/leech/` | +| `plugins/` | 用户插件 | 快速试验新功能先放这里 | +| `src/plugin/` | 内置插件 | 稳定功能再迁入这里 | +| `node_modules/` | npm dependencies | 不提交内容 | + +## 5. Tracked 文件清单 + +```text +.env-sample +.gitignore +.npmrc +.nvmrc +CHANGELOG.md +INSTALL.md +LEECH_ARCHITECTURE.md +LEECH_FEATURES.md +LEECH_README.md +LICENSE +README.md +TELEBOX_DEVELOPMENT.md +assets/.gitkeep +node_modules/.gitkeep +package.json +plugins/.gitkeep +plugins/moyu.ts +scripts/leech-smoke.ts +scripts/run-tsx.cjs +src/hook/listen.ts +src/hook/patches/telegram.patch.ts +src/hook/types/telegram.d.ts +src/index.ts +src/plugin/alias.ts +src/plugin/bf.ts +src/plugin/debug.ts +src/plugin/exec.ts +src/plugin/help.ts +src/plugin/leech.ts +src/plugin/loglevel.ts +src/plugin/ping.ts +src/plugin/prefix.ts +src/plugin/re.ts +src/plugin/reload.ts +src/plugin/sendLog.ts +src/plugin/status.ts +src/plugin/sudo.ts +src/plugin/sure.ts +src/plugin/tpm.ts +src/plugin/update.ts +src/utils/aliasDB.ts +src/utils/apiConfig.ts +src/utils/authGuards.ts +src/utils/banUtils.ts +src/utils/channelGapBreaker.ts +src/utils/conversation.ts +src/utils/cronManager.ts +src/utils/entityHelpers.ts +src/utils/generationContext.ts +src/utils/globalClient.ts +src/utils/leech/dateRange.ts +src/utils/leech/json.ts +src/utils/leech/leechDB.ts +src/utils/leech/leechService.ts +src/utils/leech/messageSerializer.ts +src/utils/leech/structuredLogger.ts +src/utils/leech/targetResolver.ts +src/utils/leech/types.ts +src/utils/logger.ts +src/utils/loginManager.ts +src/utils/npm_install.ts +src/utils/pathHelpers.ts +src/utils/pluginBase.ts +src/utils/pluginManager.ts +src/utils/runtimeManager.ts +src/utils/safeGetMessages.ts +src/utils/sendLogDB.ts +src/utils/sudoDB.ts +src/utils/sureDB.ts +src/utils/teleboxInfoHelper.ts +src/utils/telegramFormatter.ts +src/utils/telegraphFormatter.ts +src/utils/tlRevive.ts +telebox.png +temp/.gitkeep +tsconfig.json +``` + +## 6. 二开提交范围建议 + +建议在 `feature/TelegramUserbot/Leeching/V1Telebox` 中先只提交文档和自己的新增插件/工具,不要提交: + +- `config.json` +- `.env` +- `assets/**/*.db` +- `assets/**/config.json`(除非是样例) +- `temp/**` +- `node_modules/**` + +如果开始做 V1 leech,建议第一批文件: + +```text +plugins/leech.ts +plugins/leech/lib/leechTypes.ts +plugins/leech/lib/leechQueue.ts +plugins/leech/lib/leechDB.ts +plugins/leech/lib/httpDownloader.ts +plugins/leech/lib/telegramUploader.ts +``` diff --git a/package.json b/package.json index 8c3252e..058760d 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,8 @@ "scripts": { "start": "node scripts/run-tsx.cjs ./src/index.ts", "tpm": "node scripts/run-tsx.cjs ./src/plugin/tpm.ts", - "dev": "NODE_ENV=development node scripts/run-tsx.cjs ./src/index.ts" + "dev": "NODE_ENV=development node scripts/run-tsx.cjs ./src/index.ts", + "leech:smoke": "node scripts/run-tsx.cjs ./scripts/leech-smoke.ts" }, "repository": { "type": "git", diff --git a/scripts/leech-smoke.ts b/scripts/leech-smoke.ts new file mode 100644 index 0000000..01c0b3e --- /dev/null +++ b/scripts/leech-smoke.ts @@ -0,0 +1,260 @@ +import fs from "fs"; +import path from "path"; +import Database from "better-sqlite3"; +import { Api } from "teleproto"; +import { LeechDB } from "@utils/leech/leechDB"; +import { LeechService } from "@utils/leech/leechService"; +import { parseLeechDateRange } from "@utils/leech/dateRange"; + +type FakeMessage = { + id: number; + date: number; + message: string; + senderId: string; + chatId: string; + peerId: string; + sender?: { + username?: string; + firstName?: string; + lastName?: string; + }; +}; + +function unix(date: string): number { + return Math.floor(new Date(date).getTime() / 1000); +} + +function cleanupDb(dbPath: string): void { + for (const suffix of ["", "-wal", "-shm"]) { + try { + fs.unlinkSync(`${dbPath}${suffix}`); + } catch { + // ignore missing smoke DB files + } + } +} + +function assert(condition: unknown, message: string): asserts condition { + if (!condition) { + throw new Error(message); + } +} + +function createFakeMessages(): FakeMessage[] { + return [ + { + id: 3, + date: unix("2026-01-02T10:00:00.000Z"), + message: "inside range - day 2", + senderId: "42", + chatId: "-100999", + peerId: "-100999", + sender: { username: "alice", firstName: "Alice" }, + }, + { + id: 2, + date: unix("2026-01-01T10:00:00.000Z"), + message: "inside range - day 1", + senderId: "43", + chatId: "-100999", + peerId: "-100999", + sender: { username: "bob", firstName: "Bob" }, + }, + { + id: 1, + date: unix("2025-12-31T10:00:00.000Z"), + message: "outside range - old", + senderId: "44", + chatId: "-100999", + peerId: "-100999", + sender: { username: "carol", firstName: "Carol" }, + }, + ]; +} + +function createFakeClient(fakeMessages: FakeMessage[]) { + return { + async getMe() { + return new Api.User({ + id: BigInt(123456), + firstName: "Smoke", + lastName: "Tester", + username: "smoke_tester", + } as any); + }, + + async getEntity() { + return { + className: "Channel", + id: "999", + title: "Smoke Group", + username: "smoke_group", + broadcast: false, + }; + }, + + async getMessages(_entity: unknown, params: { limit?: number; offsetDate?: number; offsetId?: number }) { + const limit = params.limit ?? 100; + const offsetDate = params.offsetDate ?? Number.MAX_SAFE_INTEGER; + const offsetId = params.offsetId || Number.MAX_SAFE_INTEGER; + return fakeMessages + .filter((message) => message.date < offsetDate && message.id < offsetId) + .slice(0, limit); + }, + }; +} + +async function runServiceSmoke(): Promise { + const dbPath = path.join(process.cwd(), "temp", "leech-smoke.db"); + fs.mkdirSync(path.dirname(dbPath), { recursive: true }); + cleanupDb(dbPath); + + const fakeMessages = createFakeMessages(); + const fakeClient = createFakeClient(fakeMessages); + + // Service-layer smoke / 服务层 smoke:验证核心抓取、保存、日志。 + const db = new LeechDB(dbPath); + const service = new LeechService(db); + const result = await service.runChatLeech({ + client: fakeClient as any, + commandMessage: { + chatId: "-100999", + peerId: "-100999", + senderId: "1", + message: ".leech chat here --from 2026-01-01 --to 2026-01-02", + } as any, + options: { + targetInput: "here", + range: parseLeechDateRange("2026-01-01", "2026-01-02"), + batchSize: 2, + actor: "smoke", + }, + }); + + assert(result.savedCount === 2, `expected savedCount=2, got ${result.savedCount}`); + assert(result.scannedCount === 3, `expected scannedCount=3, got ${result.scannedCount}`); + assert(result.stoppedReason === "from_boundary_reached", `unexpected stop reason: ${result.stoppedReason}`); + + const stats = service.stats("-100999"); + assert(stats.totalMessages === 2, `expected totalMessages=2, got ${stats.totalMessages}`); + db.close(); + + const sqlite = new Database(dbPath, { readonly: true }); + try { + const actionCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_actions").get() as { n: number }).n; + const jobCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_jobs").get() as { n: number }).n; + const messageCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_messages").get() as { n: number }).n; + assert(actionCount >= 6, `expected >=6 structured action logs, got ${actionCount}`); + assert(jobCount === 1, `expected 1 job, got ${jobCount}`); + assert(messageCount === 2, `expected 2 saved messages, got ${messageCount}`); + console.log("leech service smoke ok", { + dbPath, + actionCount, + jobCount, + messageCount, + result, + }); + } finally { + sqlite.close(); + } +} + +function installFakeGlobalClient(fakeClient: unknown): void { + const moduleId = require.resolve("@utils/globalClient"); + require.cache[moduleId] = { + id: moduleId, + filename: moduleId, + loaded: true, + exports: { + getGlobalClient: async () => fakeClient, + tryGetCurrentGenerationContext: () => undefined, + getCurrentGeneration: () => 1, + getCurrentGenerationContext: () => { + throw new Error("smoke GenerationContext is not installed"); + }, + }, + } as NodeModule; +} + +function makeCommandMessage(text: string, edits: Array<{ text?: string; parseMode?: string }>) { + return { + id: 999, + chatId: "-100999", + peerId: "-100999", + senderId: "1", + message: text, + text, + out: true, + async edit(params: { text?: string; parseMode?: string }) { + edits.push(params); + return this; + }, + }; +} + +async function runPluginSmoke(): Promise { + const dbPath = path.join(process.cwd(), "temp", "leech-plugin-smoke.db"); + fs.mkdirSync(path.dirname(dbPath), { recursive: true }); + cleanupDb(dbPath); + process.env.TB_LEECH_DB_PATH = dbPath; + + const fakeClient = createFakeClient(createFakeMessages()); + installFakeGlobalClient(fakeClient); + + // Import after monkey-patching @utils/globalClient. + // 必须在 monkey patch 之后 import 插件,确保命令入口拿到 fake client。 + const plugin = require("../src/plugin/leech").default as { + cmdHandlers: Record Promise>; + }; + + const edits: Array<{ text?: string; parseMode?: string }> = []; + for (const command of [ + ".leech login", + ".leech chat here --from 2026-01-01 --to 2026-01-02 --batch 2", + ".leech jobs 5", + ".leech stats", + ".leech db", + ]) { + await plugin.cmdHandlers.leech(makeCommandMessage(command, edits)); + } + + assert(edits.some((edit) => edit.text?.includes("Telegram session OK")), "login command did not report session OK"); + assert(edits.some((edit) => edit.text?.includes("Leech completed")), "chat command did not complete"); + assert(edits.some((edit) => edit.text?.includes("Recent Leech Jobs")), "jobs command did not render jobs"); + assert(edits.some((edit) => edit.text?.includes("Leech SQLite Stats")), "stats command did not render stats"); + assert(edits.some((edit) => edit.text?.includes("Leech SQLite DB")), "db command did not render db path"); + + const sqlite = new Database(dbPath, { readonly: true }); + try { + const actionCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_actions").get() as { n: number }).n; + const jobCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_jobs").get() as { n: number }).n; + const messageCount = (sqlite.prepare("SELECT COUNT(*) AS n FROM leech_messages").get() as { n: number }).n; + assert(actionCount >= 12, `expected >=12 plugin action logs, got ${actionCount}`); + assert(jobCount === 1, `expected 1 plugin job, got ${jobCount}`); + assert(messageCount === 2, `expected 2 plugin messages, got ${messageCount}`); + console.log("leech plugin smoke ok", { + dbPath, + actionCount, + jobCount, + messageCount, + editCount: edits.length, + }); + } finally { + sqlite.close(); + delete process.env.TB_LEECH_DB_PATH; + } +} + +/** + * Local smoke test for SQLite + structured log + date range + plugin command logic. + * 本地 smoke 测试:不连接 Telegram,验证 SQLite、结构化日志、日期范围、插件命令入口。 + */ +async function main(): Promise { + await runServiceSmoke(); + await runPluginSmoke(); +} + +main().catch((error) => { + console.error("leech smoke failed", error); + process.exit(1); +}); diff --git a/src/plugin/leech.ts b/src/plugin/leech.ts new file mode 100644 index 0000000..0eee0b4 --- /dev/null +++ b/src/plugin/leech.ts @@ -0,0 +1,357 @@ +import { Api } from "teleproto"; +import { Plugin } from "@utils/pluginBase"; +import { getGlobalClient, tryGetCurrentGenerationContext } from "@utils/globalClient"; +import { getPrefixes } from "@utils/pluginManager"; +import { LeechDB } from "@utils/leech/leechDB"; +import { LeechService } from "@utils/leech/leechService"; +import { StructuredLeechLogger, createLeechActionId } from "@utils/leech/structuredLogger"; +import { parseLeechDateRange } from "@utils/leech/dateRange"; + +const mainPrefix = getPrefixes()[0] || "."; + +function htmlEscape(input: unknown): string { + return String(input ?? "") + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function splitArgs(input: string): string[] { + const args: string[] = []; + const regex = /"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)'|(\S+)/g; + let match: RegExpExecArray | null; + while ((match = regex.exec(input))) { + args.push((match[1] ?? match[2] ?? match[3]).replace(/\\(["'])/g, "$1")); + } + return args; +} + +function parseFlags(args: string[]): { positional: string[]; flags: Record } { + const positional: string[] = []; + const flags: Record = {}; + const shortMap: Record = { + f: "from", + t: "to", + l: "limit", + b: "batch", + }; + + for (let i = 0; i < args.length; i += 1) { + const token = args[i]; + if (/^-?\d+$/.test(token)) { + positional.push(token); + continue; + } + + if (token.startsWith("--")) { + const raw = token.slice(2); + const [key, inlineValue] = raw.split(/=(.*)/s).filter((part) => part !== undefined); + if (inlineValue !== undefined) { + flags[key] = inlineValue; + } else if (args[i + 1] && !args[i + 1].startsWith("--")) { + flags[key] = args[i + 1]; + i += 1; + } else { + flags[key] = true; + } + continue; + } + + if (/^-[A-Za-z]$/.test(token)) { + const key = shortMap[token.slice(1)] || token.slice(1); + if (args[i + 1]) { + flags[key] = args[i + 1]; + i += 1; + } else { + flags[key] = true; + } + continue; + } + + positional.push(token); + } + + return { positional, flags }; +} + +function flagString(flags: Record, key: string): string | undefined { + const value = flags[key]; + return typeof value === "string" ? value : undefined; +} + +function flagNumber(flags: Record, key: string): number | undefined { + const value = flagString(flags, key); + if (!value) return undefined; + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid --${key}: ${value}`); + } + return parsed; +} + +function clampBatch(input?: number): number { + if (!input) return 100; + return Math.max(1, Math.min(Math.floor(input), 100)); +} + +function actorFromMessage(msg: Api.Message): string { + return String((msg as any).senderId ?? (msg as any).chatId ?? "unknown"); +} + +const helpText = `TeleBox Leech V1 + +${mainPrefix}leech login / ${mainPrefix}leech session + 检查当前 Telegram session / Check current Telegram session. + +${mainPrefix}leech chat here --from 2026-01-01 --to 2026-01-31 + 抓取当前聊天日期范围内的消息并保存到 SQLite。 + +${mainPrefix}leech chat @username --from 2026-01-01 --to 2026-01-31 --limit 500 --batch 100 + 抓取指定 chat/group/channel 的消息。target 支持 @username、数字 ID、t.me 链接、here。 + +${mainPrefix}leech jobs [limit] + 查看最近任务。 + +${mainPrefix}leech stats + 查看 SQLite 保存统计。 + +${mainPrefix}leech db + 显示本地 SQLite DB 路径。 + +所有 action 会输出 JSON structured log,并写入 leech_actions 表。`; + +class LeechPlugin extends Plugin { + description = helpText; + + cmdHandlers: Record Promise> = { + leech: async (msg) => { + const args = splitArgs(msg.message.trim()); + const [, subRaw = "help", ...rest] = args; + const sub = subRaw.toLowerCase(); + + const db = new LeechDB(); + const logger = new StructuredLeechLogger(db); + const service = new LeechService(db, logger); + const actor = actorFromMessage(msg); + const commandActionId = createLeechActionId("leech_command"); + + try { + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "start", + actor, + target: (msg as any).chatId ? String((msg as any).chatId) : null, + details: { raw: msg.message }, + }); + + if (sub === "help" || sub === "h") { + await msg.edit({ text: helpText, parseMode: "html" }); + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "success", + actor, + }); + return; + } + + if (sub === "login" || sub === "session") { + await this.handleSession(msg, service, actor); + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "success", + actor, + }); + return; + } + + if (sub === "chat" || sub === "group" || sub === "messages") { + await this.handleChat(msg, service, rest, actor); + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "success", + actor, + }); + return; + } + + if (sub === "jobs") { + await this.handleJobs(msg, service, rest); + logger.log({ + actionId: commandActionId, + action: "command.jobs", + status: "success", + actor, + }); + return; + } + + if (sub === "stats") { + await this.handleStats(msg, service); + logger.log({ + actionId: commandActionId, + action: "command.stats", + status: "success", + actor, + }); + return; + } + + if (sub === "db") { + await msg.edit({ + text: `🗄️ Leech SQLite DB:\n${htmlEscape(service.dbPath)}`, + parseMode: "html", + }); + logger.log({ + actionId: commandActionId, + action: "command.db", + status: "success", + actor, + details: { dbPath: service.dbPath }, + }); + return; + } + + await msg.edit({ + text: `❌ Unknown leech subcommand: ${htmlEscape(sub)}\n\n${helpText}`, + parseMode: "html", + }); + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "skipped", + actor, + details: { reason: "unknown_subcommand" }, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.log({ + actionId: commandActionId, + action: `command.${sub}`, + status: "error", + actor, + details: { error: message }, + }); + await msg.edit({ + text: `❌ Leech error:\n${htmlEscape(message)}`, + parseMode: "html", + }); + } finally { + db.close(); + } + }, + }; + + private async handleSession(msg: Api.Message, service: LeechService, actor: string): Promise { + const client = await getGlobalClient(); + const me = await service.checkSession(client, actor); + await msg.edit({ + text: + `✅ Telegram session OK\n` + + `· ID: ${htmlEscape(me.id || "N/A")}\n` + + `· Username: ${htmlEscape(me.username || "N/A")}\n` + + `· Name: ${htmlEscape(me.name)}`, + parseMode: "html", + }); + } + + private async handleChat( + msg: Api.Message, + service: LeechService, + args: string[], + actor: string + ): Promise { + const { positional, flags } = parseFlags(args); + const targetInput = positional[0] || "here"; + const range = parseLeechDateRange(flagString(flags, "from"), flagString(flags, "to")); + const batchSize = clampBatch(flagNumber(flags, "batch")); + const limitRaw = flagNumber(flags, "limit"); + const limit = limitRaw && limitRaw > 0 ? Math.floor(limitRaw) : undefined; + const client = await getGlobalClient(); + const lifecycle = tryGetCurrentGenerationContext() ?? undefined; + + await msg.edit({ + text: + `⏳ Leech started\n` + + `· Target: ${htmlEscape(targetInput)}\n` + + `· Range: ${htmlEscape(range.label)}\n` + + `· Batch: ${batchSize}\n` + + `· Limit: ${limit ?? "unlimited"}`, + parseMode: "html", + }); + + const result = await service.runChatLeech({ + client, + commandMessage: msg, + lifecycle, + options: { + targetInput, + range, + batchSize, + limit, + actor, + }, + }); + + await msg.edit({ + text: + `✅ Leech completed\n` + + `· Job: ${result.jobId}\n` + + `· Chat: ${htmlEscape(result.chat.chatTitle)} (${htmlEscape(result.chat.chatId)})\n` + + `· Type: ${htmlEscape(result.chat.chatType)}\n` + + `· Saved: ${result.savedCount}\n` + + `· Scanned: ${result.scannedCount}\n` + + `· Stop: ${htmlEscape(result.stoppedReason)}\n` + + `· DB: ${htmlEscape(result.dbPath)}`, + parseMode: "html", + }); + } + + private async handleJobs(msg: Api.Message, service: LeechService, args: string[]): Promise { + const requestedLimit = Number(args[0] || 10); + const limit = Number.isFinite(requestedLimit) + ? Math.max(1, Math.min(requestedLimit, 20)) + : 10; + const jobs = service.listJobs(limit); + if (jobs.length === 0) { + await msg.edit({ text: "📭 No leech jobs yet." }); + return; + } + + const lines = jobs.map((job) => { + return [ + `#${job.id}`, + htmlEscape(job.status), + htmlEscape(job.chat_title || job.target), + `saved=${job.saved_count}`, + `range=${job.from_ts}-${job.to_ts}`, + ].join(" | "); + }); + + await msg.edit({ + text: `Recent Leech Jobs\n
${lines.join("\n")}
`, + parseMode: "html", + }); + } + + private async handleStats(msg: Api.Message, service: LeechService): Promise { + const stats = service.stats(); + await msg.edit({ + text: + `Leech SQLite Stats\n` + + `· Messages: ${stats.totalMessages}\n` + + `· Jobs: ${stats.totalJobs}\n` + + `· First message: ${htmlEscape(stats.firstMessageIso || "N/A")}\n` + + `· Last message: ${htmlEscape(stats.lastMessageIso || "N/A")}\n` + + `· Last job: ${htmlEscape(stats.lastJobStatus || "N/A")}\n` + + `· DB: ${htmlEscape(service.dbPath)}`, + parseMode: "html", + }); + } +} + +export default new LeechPlugin(); diff --git a/src/utils/leech/dateRange.ts b/src/utils/leech/dateRange.ts new file mode 100644 index 0000000..ff32885 --- /dev/null +++ b/src/utils/leech/dateRange.ts @@ -0,0 +1,65 @@ +import type { LeechDateRange } from "./types"; + +function pad(n: number): string { + return String(n).padStart(2, "0"); +} + +function formatLocal(date: Date): string { + return `${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())} ${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}`; +} + +function parseInputDate(input: string, endOfDay: boolean): Date { + const trimmed = input.trim(); + const dateOnly = trimmed.match(/^(\d{4})-(\d{2})-(\d{2})$/); + if (dateOnly) { + const [, y, m, d] = dateOnly; + return new Date( + Number(y), + Number(m) - 1, + Number(d), + endOfDay ? 23 : 0, + endOfDay ? 59 : 0, + endOfDay ? 59 : 0, + endOfDay ? 999 : 0 + ); + } + + const parsed = new Date(trimmed); + if (Number.isNaN(parsed.getTime())) { + throw new Error(`Invalid date: ${input}`); + } + return parsed; +} + +/** + * Parse user supplied range into Telegram seconds. + * 解析用户传入的日期范围,并转换成 Telegram 使用的秒级时间戳。 + */ +export function parseLeechDateRange(fromInput?: string, toInput?: string): LeechDateRange { + if (!fromInput || !toInput) { + throw new Error("Missing date range. Required: --from YYYY-MM-DD --to YYYY-MM-DD"); + } + + const from = parseInputDate(fromInput, false); + const to = parseInputDate(toInput, true); + if (from.getTime() > to.getTime()) { + throw new Error("--from must be earlier than or equal to --to"); + } + + const fromTs = Math.floor(from.getTime() / 1000); + const toTs = Math.floor(to.getTime() / 1000); + + return { + from, + to, + fromTs, + toTs, + label: `${formatLocal(from)} -> ${formatLocal(to)}`, + }; +} + +export function isoFromUnixSeconds(ts?: number | null): string | null { + if (typeof ts !== "number" || Number.isNaN(ts)) return null; + return new Date(ts * 1000).toISOString(); +} + diff --git a/src/utils/leech/json.ts b/src/utils/leech/json.ts new file mode 100644 index 0000000..98a460c --- /dev/null +++ b/src/utils/leech/json.ts @@ -0,0 +1,37 @@ +export function safeJsonStringify(value: unknown): string { + const seen = new WeakSet(); + return JSON.stringify( + value, + (key, val) => { + if (typeof val === "bigint") return val.toString(); + if (typeof val === "function") return undefined; + if (key === "client" || key === "_client") return undefined; + if (typeof val === "object" && val !== null) { + if (seen.has(val)) return "[Circular]"; + seen.add(val); + } + return val; + }, + 0 + ); +} + +export function toIdString(value: unknown): string | null { + if (value === undefined || value === null) return null; + try { + return String(value); + } catch { + return null; + } +} + +export function toNumber(value: unknown): number | null { + if (typeof value === "number") return Number.isFinite(value) ? value : null; + if (typeof value === "bigint") return Number(value); + if (typeof value === "string" && value.trim()) { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; + } + return null; +} + diff --git a/src/utils/leech/leechDB.ts b/src/utils/leech/leechDB.ts new file mode 100644 index 0000000..5f75826 --- /dev/null +++ b/src/utils/leech/leechDB.ts @@ -0,0 +1,278 @@ +import Database from "better-sqlite3"; +import path from "path"; +import { createDirectoryInAssets } from "@utils/pathHelpers"; +import { safeJsonStringify } from "./json"; +import type { + LeechActionLogInput, + LeechJobCreateInput, + LeechJobSummary, + LeechStats, + LeechStoredMessage, +} from "./types"; + +export class LeechDB { + private db: Database.Database; + readonly dbPath: string; + + constructor(dbPath: string = process.env.TB_LEECH_DB_PATH || path.join(createDirectoryInAssets("leech"), "leech.db")) { + this.dbPath = dbPath; + this.db = new Database(dbPath); + this.db.pragma("journal_mode = WAL"); + this.db.pragma("foreign_keys = ON"); + this.init(); + } + + private init(): void { + this.db.exec(` + CREATE TABLE IF NOT EXISTS leech_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + action_id TEXT NOT NULL, + target TEXT NOT NULL, + chat_id TEXT, + chat_title TEXT, + chat_type TEXT, + from_ts INTEGER NOT NULL, + to_ts INTEGER NOT NULL, + status TEXT NOT NULL, + requested_limit INTEGER, + batch_size INTEGER NOT NULL, + saved_count INTEGER NOT NULL DEFAULT 0, + scanned_count INTEGER NOT NULL DEFAULT 0, + started_at TEXT NOT NULL, + finished_at TEXT, + error TEXT, + options_json TEXT + ); + + CREATE TABLE IF NOT EXISTS leech_messages ( + chat_id TEXT NOT NULL, + message_id INTEGER NOT NULL, + first_job_id INTEGER NOT NULL, + last_job_id INTEGER NOT NULL, + date_ts INTEGER NOT NULL, + date_iso TEXT NOT NULL, + edit_date_ts INTEGER, + sender_id TEXT, + sender_username TEXT, + sender_name TEXT, + message_text TEXT, + raw_json TEXT NOT NULL, + media_type TEXT, + reply_to_msg_id INTEGER, + grouped_id TEXT, + views INTEGER, + forwards INTEGER, + is_out INTEGER NOT NULL DEFAULT 0, + saved_at TEXT NOT NULL, + PRIMARY KEY (chat_id, message_id) + ); + + CREATE INDEX IF NOT EXISTS idx_leech_messages_date + ON leech_messages(chat_id, date_ts); + + CREATE TABLE IF NOT EXISTS leech_actions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + action_id TEXT NOT NULL, + job_id INTEGER, + action TEXT NOT NULL, + status TEXT NOT NULL, + timestamp TEXT NOT NULL, + actor TEXT, + target TEXT, + details_json TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_leech_actions_action_id + ON leech_actions(action_id); + `); + } + + createJob(input: LeechJobCreateInput): number { + const now = new Date().toISOString(); + const info = this.db + .prepare( + `INSERT INTO leech_jobs ( + action_id, target, chat_id, chat_title, chat_type, + from_ts, to_ts, status, requested_limit, batch_size, + started_at, options_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, 'running', ?, ?, ?, ?)` + ) + .run( + input.actionId, + input.target, + input.chat?.chatId ?? null, + input.chat?.chatTitle ?? null, + input.chat?.chatType ?? null, + input.range.fromTs, + input.range.toTs, + input.limit ?? null, + input.batchSize, + now, + safeJsonStringify(input.options ?? {}) + ); + return Number(info.lastInsertRowid); + } + + updateJobChat(jobId: number, chat: LeechJobCreateInput["chat"]): void { + if (!chat) return; + this.db + .prepare( + `UPDATE leech_jobs + SET chat_id = ?, chat_title = ?, chat_type = ? + WHERE id = ?` + ) + .run(chat.chatId, chat.chatTitle, chat.chatType, jobId); + } + + updateJobProgress(jobId: number, savedCount: number, scannedCount: number): void { + this.db + .prepare( + `UPDATE leech_jobs + SET saved_count = ?, scanned_count = ? + WHERE id = ?` + ) + .run(savedCount, scannedCount, jobId); + } + + finishJob(jobId: number, savedCount: number, scannedCount: number): void { + this.db + .prepare( + `UPDATE leech_jobs + SET status = 'completed', saved_count = ?, scanned_count = ?, finished_at = ? + WHERE id = ?` + ) + .run(savedCount, scannedCount, new Date().toISOString(), jobId); + } + + failJob(jobId: number, error: unknown, savedCount = 0, scannedCount = 0): void { + const message = error instanceof Error ? error.stack || error.message : String(error); + this.db + .prepare( + `UPDATE leech_jobs + SET status = 'failed', saved_count = ?, scanned_count = ?, finished_at = ?, error = ? + WHERE id = ?` + ) + .run(savedCount, scannedCount, new Date().toISOString(), message, jobId); + } + + upsertMessage(message: LeechStoredMessage): void { + this.db + .prepare( + `INSERT INTO leech_messages ( + chat_id, message_id, first_job_id, last_job_id, date_ts, date_iso, + edit_date_ts, sender_id, sender_username, sender_name, message_text, + raw_json, media_type, reply_to_msg_id, grouped_id, views, forwards, + is_out, saved_at + ) VALUES ( + @chatId, @messageId, @firstJobId, @lastJobId, @dateTs, @dateIso, + @editDateTs, @senderId, @senderUsername, @senderName, @messageText, + @rawJson, @mediaType, @replyToMsgId, @groupedId, @views, @forwards, + @isOut, @savedAt + ) + ON CONFLICT(chat_id, message_id) DO UPDATE SET + last_job_id = excluded.last_job_id, + date_ts = excluded.date_ts, + date_iso = excluded.date_iso, + edit_date_ts = excluded.edit_date_ts, + sender_id = excluded.sender_id, + sender_username = excluded.sender_username, + sender_name = excluded.sender_name, + message_text = excluded.message_text, + raw_json = excluded.raw_json, + media_type = excluded.media_type, + reply_to_msg_id = excluded.reply_to_msg_id, + grouped_id = excluded.grouped_id, + views = excluded.views, + forwards = excluded.forwards, + is_out = excluded.is_out, + saved_at = excluded.saved_at` + ) + .run({ + ...message, + isOut: message.isOut ? 1 : 0, + }); + } + + recordAction(input: LeechActionLogInput): void { + this.db + .prepare( + `INSERT INTO leech_actions ( + action_id, job_id, action, status, timestamp, actor, target, details_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)` + ) + .run( + input.actionId, + input.jobId ?? null, + input.action, + input.status, + new Date().toISOString(), + input.actor ?? null, + input.target ?? null, + safeJsonStringify(input.details ?? {}) + ); + } + + listJobs(limit = 10): LeechJobSummary[] { + return this.db + .prepare( + `SELECT * + FROM leech_jobs + ORDER BY id DESC + LIMIT ?` + ) + .all(Math.max(1, Math.min(limit, 50))) as LeechJobSummary[]; + } + + stats(chatId?: string): LeechStats { + const messageRow = this.db + .prepare( + `SELECT + COUNT(*) AS totalMessages, + MIN(date_iso) AS firstMessageIso, + MAX(date_iso) AS lastMessageIso + FROM leech_messages + WHERE (? IS NULL OR chat_id = ?)` + ) + .get(chatId ?? null, chatId ?? null) as { + totalMessages: number; + firstMessageIso?: string | null; + lastMessageIso?: string | null; + }; + + const jobRow = this.db + .prepare( + `SELECT + COUNT(*) AS totalJobs, + (SELECT status FROM leech_jobs WHERE (? IS NULL OR chat_id = ?) ORDER BY id DESC LIMIT 1) AS lastJobStatus, + (SELECT finished_at FROM leech_jobs WHERE (? IS NULL OR chat_id = ?) ORDER BY id DESC LIMIT 1) AS lastJobFinishedAt + FROM leech_jobs + WHERE (? IS NULL OR chat_id = ?)` + ) + .get( + chatId ?? null, + chatId ?? null, + chatId ?? null, + chatId ?? null, + chatId ?? null, + chatId ?? null + ) as { + totalJobs: number; + lastJobStatus?: string | null; + lastJobFinishedAt?: string | null; + }; + + return { + chatId, + totalMessages: Number(messageRow.totalMessages || 0), + firstMessageIso: messageRow.firstMessageIso ?? null, + lastMessageIso: messageRow.lastMessageIso ?? null, + totalJobs: Number(jobRow.totalJobs || 0), + lastJobStatus: jobRow.lastJobStatus ?? null, + lastJobFinishedAt: jobRow.lastJobFinishedAt ?? null, + }; + } + + close(): void { + this.db.close(); + } +} diff --git a/src/utils/leech/leechService.ts b/src/utils/leech/leechService.ts new file mode 100644 index 0000000..47e3d55 --- /dev/null +++ b/src/utils/leech/leechService.ts @@ -0,0 +1,334 @@ +import { Api, TelegramClient } from "teleproto"; +import type { GenerationContext } from "@utils/generationContext"; +import { safeGetMessages } from "@utils/safeGetMessages"; +import { safeGetMe } from "@utils/authGuards"; +import { LeechDB } from "./leechDB"; +import { serializeLeechMessage } from "./messageSerializer"; +import { StructuredLeechLogger, createLeechActionId } from "./structuredLogger"; +import { resolveLeechTarget } from "./targetResolver"; +import type { LeechChatIdentity, LeechJobSummary, LeechRunOptions, LeechStats } from "./types"; + +export interface LeechRunResult { + actionId: string; + jobId: number; + chat: LeechChatIdentity; + savedCount: number; + scannedCount: number; + dbPath: string; + stoppedReason: string; +} + +function abortError(reason?: unknown): Error { + if (reason instanceof Error) return reason; + if (typeof reason === "string") return new Error(reason); + return new Error("Leech operation aborted"); +} + +function throwIfAborted(lifecycle?: GenerationContext): void { + if (lifecycle?.signal.aborted) { + throw abortError(lifecycle.signal.reason); + } +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +export class LeechService { + constructor( + private readonly db: LeechDB, + private readonly logger: StructuredLeechLogger = new StructuredLeechLogger(db) + ) {} + + /** + * Check current Telegram session. The actual login is owned by TeleBox runtime. + * 检查当前 Telegram Session;真正的登录流程由 TeleBox runtime 负责。 + */ + async checkSession(client: TelegramClient, actor?: string): Promise<{ id?: string; username?: string; name: string }> { + const actionId = createLeechActionId("leech_session"); + this.logger.log({ + actionId, + action: "session.check", + status: "start", + actor, + }); + + const me = await safeGetMe(client); + const name = [me?.firstName, me?.lastName].filter(Boolean).join(" ") || me?.username || "unknown"; + + this.logger.log({ + actionId, + action: "session.check", + status: "success", + actor, + details: { + id: me?.id ? String(me.id) : null, + username: me?.username ? `@${me.username}` : null, + name, + }, + }); + + return { + id: me?.id ? String(me.id) : undefined, + username: me?.username ? `@${me.username}` : undefined, + name, + }; + } + + async runChatLeech(params: { + client: TelegramClient; + commandMessage: Api.Message; + options: LeechRunOptions; + lifecycle?: GenerationContext; + }): Promise { + const actionId = createLeechActionId("leech_chat"); + const { client, commandMessage, options, lifecycle } = params; + let jobId = 0; + let savedCount = 0; + let scannedCount = 0; + let chat: LeechChatIdentity | undefined; + let stoppedReason = "completed"; + + this.logger.log({ + actionId, + action: "chat.leech.command", + status: "start", + actor: options.actor, + target: options.targetInput, + details: { + range: options.range.label, + fromTs: options.range.fromTs, + toTs: options.range.toTs, + batchSize: options.batchSize, + limit: options.limit ?? null, + }, + }); + + try { + throwIfAborted(lifecycle); + const resolved = await resolveLeechTarget({ + client, + commandMessage, + targetInput: options.targetInput, + }); + chat = resolved.identity; + + jobId = this.db.createJob({ + actionId, + target: options.targetInput, + chat, + range: options.range, + batchSize: options.batchSize, + limit: options.limit, + options: { + actor: options.actor, + targetInput: options.targetInput, + }, + }); + + this.logger.log({ + actionId, + jobId, + action: "chat.resolve_target", + status: "success", + actor: options.actor, + target: chat.chatId, + details: { ...chat }, + }); + + let offsetId = 0; + // Telegram offsetDate is exclusive, so +1 second keeps the --to second inclusive. + // Telegram 的 offsetDate 是排除边界,因此 +1 秒来保证 --to 当秒被包含。 + let offsetDate = options.range.toTs + 1; + let batchNo = 0; + + while (true) { + throwIfAborted(lifecycle); + if (options.limit && savedCount >= options.limit) { + stoppedReason = "limit_reached"; + break; + } + + batchNo += 1; + const fetchLimit = options.limit + ? Math.max(1, Math.min(options.batchSize, options.limit - savedCount)) + : options.batchSize; + + this.logger.log({ + actionId, + jobId, + action: "chat.fetch_batch", + status: "start", + actor: options.actor, + target: chat.chatId, + details: { + batchNo, + offsetId, + offsetDate, + fetchLimit, + }, + }); + + const messages = await (lifecycle + ? lifecycle.runTask( + async () => + await safeGetMessages(client, resolved.entity, { + limit: fetchLimit, + offsetId, + offsetDate, + }), + { label: `leech:fetch:${chat.chatId}:batch-${batchNo}` } + ) + : safeGetMessages(client, resolved.entity, { + limit: fetchLimit, + offsetId, + offsetDate, + })); + + const batchMessages = messages.filter((message): message is Api.Message => { + return !!message && typeof (message as any).id === "number"; + }); + + this.logger.log({ + actionId, + jobId, + action: "chat.fetch_batch", + status: "success", + actor: options.actor, + target: chat.chatId, + details: { + batchNo, + received: batchMessages.length, + }, + }); + + if (batchMessages.length === 0) { + stoppedReason = "no_more_messages"; + break; + } + + let reachedFromBoundary = false; + for (const message of batchMessages) { + const dateTs = Number((message as any).date || 0); + scannedCount += 1; + + if (dateTs < options.range.fromTs) { + reachedFromBoundary = true; + continue; + } + if (dateTs > options.range.toTs) { + continue; + } + + const row = serializeLeechMessage(message, chat, jobId); + if (!row) { + this.logger.log({ + actionId, + jobId, + action: "chat.save_message", + status: "skipped", + actor: options.actor, + target: chat.chatId, + details: { + messageId: (message as any).id, + reason: "serialize_failed", + }, + }); + continue; + } + + this.db.upsertMessage(row); + savedCount += 1; + + if (options.limit && savedCount >= options.limit) { + stoppedReason = "limit_reached"; + break; + } + } + + this.db.updateJobProgress(jobId, savedCount, scannedCount); + this.logger.log({ + actionId, + jobId, + action: "chat.save_batch", + status: "progress", + actor: options.actor, + target: chat.chatId, + details: { + batchNo, + savedCount, + scannedCount, + }, + }); + + if (reachedFromBoundary) { + stoppedReason = "from_boundary_reached"; + break; + } + + const last = batchMessages[batchMessages.length - 1] as any; + offsetId = Number(last.id); + offsetDate = Number(last.date || 0); + + if (batchMessages.length < fetchLimit) { + stoppedReason = "short_batch"; + break; + } + } + + this.db.finishJob(jobId, savedCount, scannedCount); + this.logger.log({ + actionId, + jobId, + action: "chat.leech.command", + status: "success", + actor: options.actor, + target: chat.chatId, + details: { + savedCount, + scannedCount, + stoppedReason, + dbPath: this.db.dbPath, + }, + }); + + return { + actionId, + jobId, + chat, + savedCount, + scannedCount, + dbPath: this.db.dbPath, + stoppedReason, + }; + } catch (error) { + if (jobId) this.db.failJob(jobId, error, savedCount, scannedCount); + this.logger.log({ + actionId, + jobId: jobId || null, + action: "chat.leech.command", + status: "error", + actor: options.actor, + target: chat?.chatId ?? options.targetInput, + details: { + error: errorMessage(error), + savedCount, + scannedCount, + }, + }); + throw error; + } + } + + listJobs(limit?: number): LeechJobSummary[] { + return this.db.listJobs(limit); + } + + stats(chatId?: string): LeechStats { + return this.db.stats(chatId); + } + + get dbPath(): string { + return this.db.dbPath; + } +} diff --git a/src/utils/leech/messageSerializer.ts b/src/utils/leech/messageSerializer.ts new file mode 100644 index 0000000..cf2a895 --- /dev/null +++ b/src/utils/leech/messageSerializer.ts @@ -0,0 +1,90 @@ +import { Api } from "teleproto"; +import { isoFromUnixSeconds } from "./dateRange"; +import { safeJsonStringify, toIdString, toNumber } from "./json"; +import type { LeechChatIdentity, LeechStoredMessage } from "./types"; + +function getClassName(value: unknown): string | null { + if (!value || typeof value !== "object") return null; + return (value as { className?: string }).className ?? value.constructor?.name ?? null; +} + +function inferMediaType(msg: Api.Message): string | null { + const anyMsg = msg as any; + if (!anyMsg.media) return null; + return getClassName(anyMsg.media); +} + +function getSenderName(sender: any): string | null { + if (!sender) return null; + if (sender.title) return sender.title; + const parts = [sender.firstName, sender.lastName].filter(Boolean); + return parts.length ? parts.join(" ") : null; +} + +function buildRawMessageSnapshot(msg: Api.Message): Record { + const anyMsg = msg as any; + return { + className: anyMsg.className, + id: anyMsg.id, + date: anyMsg.date, + editDate: anyMsg.editDate, + message: anyMsg.message, + senderId: toIdString(anyMsg.senderId), + chatId: toIdString(anyMsg.chatId), + peerId: toIdString(anyMsg.peerId), + replyTo: anyMsg.replyTo, + fwdFrom: anyMsg.fwdFrom, + mediaClassName: getClassName(anyMsg.media), + entities: anyMsg.entities, + groupedId: toIdString(anyMsg.groupedId), + postAuthor: anyMsg.postAuthor, + views: anyMsg.views, + forwards: anyMsg.forwards, + out: Boolean(anyMsg.out), + mentioned: Boolean(anyMsg.mentioned), + post: Boolean(anyMsg.post), + }; +} + +/** + * Convert a Telegram message into a stable SQLite row. + * 将 Telegram 消息转换为稳定的 SQLite 行,避免把 client/circular object 存入 DB。 + */ +export function serializeLeechMessage( + msg: Api.Message, + chat: LeechChatIdentity, + jobId: number +): LeechStoredMessage | null { + const anyMsg = msg as any; + const messageId = toNumber(anyMsg.id); + const dateTs = toNumber(anyMsg.date); + if (!messageId || !dateTs) return null; + + const sender = anyMsg.sender; + const replyToMsgId = toNumber(anyMsg.replyTo?.replyToMsgId ?? anyMsg.replyToMsgId); + const editDateTs = toNumber(anyMsg.editDate); + const dateIso = isoFromUnixSeconds(dateTs) ?? new Date(dateTs * 1000).toISOString(); + + return { + chatId: chat.chatId, + messageId, + firstJobId: jobId, + lastJobId: jobId, + dateTs, + dateIso, + editDateTs, + senderId: toIdString(anyMsg.senderId), + senderUsername: sender?.username ?? null, + senderName: getSenderName(sender), + messageText: typeof anyMsg.message === "string" ? anyMsg.message : null, + rawJson: safeJsonStringify(buildRawMessageSnapshot(msg)), + mediaType: inferMediaType(msg), + replyToMsgId, + groupedId: toIdString(anyMsg.groupedId), + views: toNumber(anyMsg.views), + forwards: toNumber(anyMsg.forwards), + isOut: Boolean(anyMsg.out), + savedAt: new Date().toISOString(), + }; +} + diff --git a/src/utils/leech/structuredLogger.ts b/src/utils/leech/structuredLogger.ts new file mode 100644 index 0000000..e1b0d76 --- /dev/null +++ b/src/utils/leech/structuredLogger.ts @@ -0,0 +1,39 @@ +import { randomUUID } from "crypto"; +import type { LeechActionLogInput } from "./types"; +import { safeJsonStringify } from "./json"; +import type { LeechDB } from "./leechDB"; + +export function createLeechActionId(prefix = "leech"): string { + return `${prefix}_${Date.now()}_${randomUUID().slice(0, 8)}`; +} + +export class StructuredLeechLogger { + constructor(private readonly db: LeechDB) {} + + /** + * Emit one machine-readable log line and persist it to SQLite. + * 输出一行可机器解析的 JSON 日志,并同步写入 SQLite,方便后续审计/回放。 + */ + log(input: LeechActionLogInput): void { + const payload = { + scope: "telebox.leech", + timestamp: new Date().toISOString(), + actionId: input.actionId, + jobId: input.jobId ?? null, + action: input.action, + status: input.status, + actor: input.actor ?? null, + target: input.target ?? null, + details: input.details ?? {}, + }; + + console.log(safeJsonStringify(payload)); + + try { + this.db.recordAction(input); + } catch (error) { + console.error("[LeechStructuredLogger] Failed to persist action log:", error); + } + } +} + diff --git a/src/utils/leech/targetResolver.ts b/src/utils/leech/targetResolver.ts new file mode 100644 index 0000000..f79e793 --- /dev/null +++ b/src/utils/leech/targetResolver.ts @@ -0,0 +1,80 @@ +import { Api, TelegramClient } from "teleproto"; +import { toIdString } from "./json"; +import type { LeechChatIdentity } from "./types"; + +function normalizeTelegramLink(input: string): string { + const trimmed = input.trim(); + const match = trimmed.match(/^https?:\/\/t\.me\/(.+)$/i); + if (!match) return trimmed; + + const path = match[1].replace(/[?#].*$/, ""); + const parts = path.split("/").filter(Boolean); + if (parts[0] === "c" && parts[1]) { + return `-100${parts[1]}`; + } + if (parts[0]) { + return parts[0].startsWith("@") ? parts[0] : `@${parts[0]}`; + } + return trimmed; +} + +function fullChatId(entity: any): string { + const raw = toIdString(entity?.id) ?? "unknown"; + if (entity?.className === "Channel") { + return raw.startsWith("-100") ? raw : `-100${raw.replace(/^-100/, "")}`; + } + if (entity?.className === "Chat" || entity?.className === "ChatForbidden") { + return raw.startsWith("-") ? raw : `-${raw}`; + } + return raw; +} + +function chatTitle(entity: any): string { + if (entity?.title) return entity.title; + const parts = [entity?.firstName, entity?.lastName].filter(Boolean); + if (parts.length) return parts.join(" "); + if (entity?.username) return `@${entity.username}`; + return entity?.className || "unknown"; +} + +function chatType(entity: any): string { + if (entity?.className === "Channel") { + return entity.broadcast ? "channel" : "supergroup"; + } + if (entity?.className === "Chat" || entity?.className === "ChatForbidden") { + return "group"; + } + if (entity?.className === "User") return entity.bot ? "bot" : "user"; + return entity?.className || "unknown"; +} + +export async function resolveLeechTarget(params: { + client: TelegramClient; + commandMessage: Api.Message; + targetInput?: string; +}): Promise<{ entity: any; identity: LeechChatIdentity }> { + const targetInput = params.targetInput?.trim(); + const hereAliases = new Set(["", "here", "current", "this", "当前", "本群"]); + + let entityLike: any; + if (!targetInput || hereAliases.has(targetInput.toLowerCase())) { + entityLike = (params.commandMessage as any).chatId ?? params.commandMessage.peerId; + } else { + const normalized = normalizeTelegramLink(targetInput); + entityLike = /^-?\d+$/.test(normalized) || normalized.startsWith("@") + ? normalized + : `@${normalized}`; + } + + const entity = await params.client.getEntity(entityLike); + const entityAny = entity as any; + const identity: LeechChatIdentity = { + input: targetInput || "here", + chatId: fullChatId(entityAny), + chatTitle: chatTitle(entityAny), + chatType: chatType(entityAny), + username: entityAny?.username ? `@${entityAny.username}` : undefined, + }; + + return { entity, identity }; +} diff --git a/src/utils/leech/types.ts b/src/utils/leech/types.ts new file mode 100644 index 0000000..1522bc0 --- /dev/null +++ b/src/utils/leech/types.ts @@ -0,0 +1,102 @@ +export type LeechActionStatus = + | "start" + | "progress" + | "success" + | "error" + | "skipped"; + +export interface LeechDateRange { + from: Date; + to: Date; + fromTs: number; + toTs: number; + label: string; +} + +export interface LeechChatIdentity { + input: string; + chatId: string; + chatTitle: string; + chatType: string; + username?: string; +} + +export interface LeechRunOptions { + targetInput: string; + range: LeechDateRange; + batchSize: number; + limit?: number; + actor?: string; +} + +export interface LeechJobCreateInput { + actionId: string; + target: string; + chat?: LeechChatIdentity; + range: LeechDateRange; + batchSize: number; + limit?: number; + options?: Record; +} + +export interface LeechStoredMessage { + chatId: string; + messageId: number; + firstJobId: number; + lastJobId: number; + dateTs: number; + dateIso: string; + editDateTs?: number | null; + senderId?: string | null; + senderUsername?: string | null; + senderName?: string | null; + messageText?: string | null; + rawJson: string; + mediaType?: string | null; + replyToMsgId?: number | null; + groupedId?: string | null; + views?: number | null; + forwards?: number | null; + isOut: boolean; + savedAt: string; +} + +export interface LeechActionLogInput { + actionId: string; + jobId?: number | null; + action: string; + status: LeechActionStatus; + actor?: string | null; + target?: string | null; + details?: Record; +} + +export interface LeechJobSummary { + id: number; + action_id: string; + target: string; + chat_id?: string | null; + chat_title?: string | null; + chat_type?: string | null; + from_ts: number; + to_ts: number; + status: string; + requested_limit?: number | null; + batch_size: number; + saved_count: number; + scanned_count: number; + started_at: string; + finished_at?: string | null; + error?: string | null; +} + +export interface LeechStats { + chatId?: string; + totalMessages: number; + firstMessageIso?: string | null; + lastMessageIso?: string | null; + totalJobs: number; + lastJobStatus?: string | null; + lastJobFinishedAt?: string | null; +} +