From fa7dd5b7ea2d54b084256305017f69a5219baf85 Mon Sep 17 00:00:00 2001 From: Feiyang Liu Date: Fri, 29 May 2026 22:55:55 +0800 Subject: [PATCH] Add forwarded chat record expansion Co-authored-by: Feiyang Liu Co-authored-by: Codex --- README.md | 22 +++- README_CN.md | 22 +++- wechat_cli/commands/records.py | 132 +++++++++++++++++++ wechat_cli/core/messages.py | 226 +++++++++++++++++++++++++++++++++ wechat_cli/main.py | 3 + 5 files changed, 401 insertions(+), 4 deletions(-) create mode 100644 wechat_cli/commands/records.py diff --git a/README.md b/README.md index 1d877d2..002e76e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Platform](https://img.shields.io/badge/platform-macOS%20%7C%20Windows%20%7C%20Linux-lightgrey.svg)](https://github.com/freestylefly/wechat-cli) -Chat history · Contacts · Sessions · Favorites · Statistics · Export +Chat history · Forwarded records · Contacts · Sessions · Favorites · Statistics · Export [中文文档](README_CN.md) @@ -19,7 +19,7 @@ Chat history · Contacts · Sessions · Favorites · Statistics · Export ## ✨ Highlights - **🚀 Zero-config install** — `npm install -g` and you're done, no Python needed -- **📦 11 commands** — sessions, history, search, contacts, members, stats, export, favorites, unread, new-messages, init +- **📦 12 commands** — sessions, history, records, search, contacts, members, stats, export, favorites, unread, new-messages, init - **🤖 AI-first** — JSON output by default, designed for LLM agent tool calls - **🔒 Fully local** — on-the-fly SQLCipher decryption, data never leaves your machine - **📊 Rich analytics** — top senders, message type breakdown, 24-hour activity charts @@ -153,6 +153,7 @@ EOF ```bash wechat-cli sessions # Recent chats wechat-cli history "Alice" --limit 20 # Chat messages +wechat-cli records "Alice" --format text # Expand forwarded chat records wechat-cli search "deadline" --chat "Team" # Search messages ``` @@ -174,6 +175,7 @@ You can use `wechat-cli` to query my local WeChat data. Common commands: - `wechat-cli sessions --limit 10` — list recent chats - `wechat-cli history "NAME" --limit 20 --format text` — read chat history +- `wechat-cli records "NAME" --limit 5 --format text` — expand forwarded/merged chat records - `wechat-cli search "KEYWORD" --chat "CHAT_NAME"` — search messages - `wechat-cli contacts --query "NAME"` — search contacts - `wechat-cli unread` — show unread sessions @@ -198,6 +200,9 @@ wechat-cli sessions --limit 5 # Read specific chat wechat-cli history "Alice" --limit 30 --format text +# Expand forwarded/merged chat records +wechat-cli records "Alice" --limit 5 --format text + # Search with filters wechat-cli search "report" --type file --limit 10 @@ -229,6 +234,19 @@ wechat-cli history "Alice" --format text **Options:** `--limit`, `--offset`, `--start-time`, `--end-time`, `--type`, `--format` +### `records` — Forwarded Chat Records + +Expand WeChat "forwarded/merged chat record" cards into their inner messages. +Nested forwarded records are expanded automatically. + +```bash +wechat-cli records "Alice" # Last 20 forwarded records +wechat-cli records "Alice" --limit 5 --format text +wechat-cli records "Team" --start-time "2026-04-01" --output records.json +``` + +**Options:** `--limit`, `--offset`, `--scan-limit`, `--start-time`, `--end-time`, `--format`, `--output` + ### `search` — Search Messages ```bash diff --git a/README_CN.md b/README_CN.md index 6483703..b888865 100644 --- a/README_CN.md +++ b/README_CN.md @@ -8,7 +8,7 @@ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Platform](https://img.shields.io/badge/platform-macOS%20%7C%20Windows%20%7C%20Linux-lightgrey.svg)](https://github.com/freestylefly/wechat-cli) -聊天记录 · 联系人 · 会话 · 收藏 · 统计 · 导出 +聊天记录 · 合并记录展开 · 联系人 · 会话 · 收藏 · 统计 · 导出 [English](README.md) @@ -19,7 +19,7 @@ ## ✨ 功能亮点 - **🚀 开箱即用** — `npm install -g` 一键安装,无需 Python -- **📦 11 个命令** — sessions、history、search、contacts、members、stats、export、favorites、unread、new-messages、init +- **📦 12 个命令** — sessions、history、records、search、contacts、members、stats、export、favorites、unread、new-messages、init - **🤖 AI 优先** — 默认 JSON 输出,专为 LLM Agent 工具调用设计 - **🔒 全程本地** — SQLCipher 即时解密,数据不出本机 - **📊 丰富统计** — 发言排行、消息类型分布、24 小时活跃图 @@ -151,6 +151,7 @@ EOF ```bash wechat-cli sessions # 最近会话 wechat-cli history "张三" --limit 20 # 聊天记录 +wechat-cli records "张三" --format text # 展开合并聊天记录 wechat-cli search "截止日期" --chat "项目组" # 搜索消息 ``` @@ -172,6 +173,7 @@ WeChat CLI 专为 AI Agent 设计,所有命令默认输出结构化 JSON。 常用命令: - `wechat-cli sessions --limit 10` — 列出最近会话 - `wechat-cli history "名称" --limit 20 --format text` — 读取聊天记录 +- `wechat-cli records "名称" --limit 5 --format text` — 展开合并聊天记录 - `wechat-cli search "关键词" --chat "聊天名"` — 搜索消息 - `wechat-cli contacts --query "名称"` — 搜索联系人 - `wechat-cli unread` — 显示未读会话 @@ -196,6 +198,9 @@ wechat-cli sessions --limit 5 # 读取指定聊天 wechat-cli history "张三" --limit 30 --format text +# 展开合并聊天记录 +wechat-cli records "张三" --limit 5 --format text + # 带过滤条件搜索 wechat-cli search "报告" --type file --limit 10 @@ -227,6 +232,19 @@ wechat-cli history "张三" --format text **选项:** `--limit`、`--offset`、`--start-time`、`--end-time`、`--type`、`--format` +### `records` — 合并聊天记录展开 + +展开微信“合并聊天记录”卡片里的内层消息。 +嵌套的合并聊天记录会自动递归展开。 + +```bash +wechat-cli records "张三" # 最近 20 条合并聊天记录 +wechat-cli records "张三" --limit 5 --format text +wechat-cli records "群聊" --start-time "2026-04-01" --output records.json +``` + +**选项:** `--limit`、`--offset`、`--scan-limit`、`--start-time`、`--end-time`、`--format`、`--output` + ### `search` — 搜索消息 ```bash diff --git a/wechat_cli/commands/records.py b/wechat_cli/commands/records.py new file mode 100644 index 0000000..15d5592 --- /dev/null +++ b/wechat_cli/commands/records.py @@ -0,0 +1,132 @@ +"""records 命令 — 展开微信合并聊天记录""" + +import click + +from ..core.contacts import get_contact_names +from ..core.messages import ( + collect_forwarded_records, + parse_time_range, + resolve_chat_context, + validate_pagination, +) +from ..output.formatter import output + + +@click.command("records") +@click.argument("chat_name") +@click.option("--limit", default=20, help="返回的合并聊天记录数量") +@click.option("--offset", default=0, help="分页偏移量") +@click.option("--scan-limit", default=5000, help="最多扫描的链接/文件消息数量") +@click.option("--start-time", default="", help="起始时间 YYYY-MM-DD [HH:MM[:SS]]") +@click.option("--end-time", default="", help="结束时间 YYYY-MM-DD [HH:MM[:SS]]") +@click.option("--format", "fmt", default="json", type=click.Choice(["json", "text"]), help="输出格式") +@click.option("--output", "output_path", default=None, help="输出文件路径(默认输出到 stdout)") +@click.pass_context +def records(ctx, chat_name, limit, offset, scan_limit, start_time, end_time, fmt, output_path): + """展开指定聊天里的“合并聊天记录”卡片 + + \b + 示例: + wechat-cli records "张三" # 展开最近 20 条合并聊天记录 + wechat-cli records "AI交流群" --limit 5 --format text + wechat-cli records "张三" --start-time "2026-04-01" --output records.txt + """ + app = ctx.obj + + try: + validate_pagination(limit, offset, limit_max=None) + if scan_limit <= 0: + raise ValueError("scan-limit 必须大于 0") + start_ts, end_ts = parse_time_range(start_time, end_time) + except ValueError as e: + click.echo(f"错误: {e}", err=True) + ctx.exit(2) + + chat_ctx = resolve_chat_context(chat_name, app.msg_db_keys, app.cache, app.decrypted_dir) + if not chat_ctx: + click.echo(f"找不到聊天对象: {chat_name}", err=True) + ctx.exit(1) + if not chat_ctx['db_path']: + click.echo(f"找不到 {chat_ctx['display_name']} 的消息记录", err=True) + ctx.exit(1) + + names = get_contact_names(app.cache, app.decrypted_dir) + records_data, failures = collect_forwarded_records( + chat_ctx, names, app.display_name_fn, + start_ts=start_ts, end_ts=end_ts, limit=limit, offset=offset, + scan_limit=scan_limit, + ) + + if fmt == "json": + result = { + 'chat': chat_ctx['display_name'], + 'username': chat_ctx['username'], + 'is_group': chat_ctx['is_group'], + 'count': len(records_data), + 'offset': offset, + 'limit': limit, + 'scan_limit': scan_limit, + 'start_time': start_time or None, + 'end_time': end_time or None, + 'records': records_data, + 'failures': failures if failures else None, + } + if output_path: + with open(output_path, 'w', encoding='utf-8') as f: + output(result, 'json', file=f) + click.echo(f"已导出到: {output_path}({len(records_data)} 条合并聊天记录)", err=True) + else: + output(result, 'json') + return + + content = _format_text(chat_ctx['display_name'], records_data, failures, offset, limit) + if output_path: + with open(output_path, 'w', encoding='utf-8') as f: + f.write(content) + if not content.endswith('\n'): + f.write('\n') + click.echo(f"已导出到: {output_path}({len(records_data)} 条合并聊天记录)", err=True) + else: + output(content, 'text') + + +def _format_text(display_name, records_data, failures, offset, limit): + header = f"{display_name} 的合并聊天记录(返回 {len(records_data)} 条,offset={offset}, limit={limit})" + if failures: + header += "\n查询失败: " + ";".join(failures) + if not records_data: + return header + "\n\n无合并聊天记录" + + chunks = [header] + for record in records_data: + chunks.append("=" * 80) + chunks.append(f"[{record['time']}] local_id={record['local_id']}") + if record.get('sender'): + chunks.append(f"发送者: {record['sender']}") + if record.get('title'): + chunks.append(f"标题: {record['title']}") + if record.get('summary'): + chunks.append(f"摘要: {record['summary']}") + count_line = f"展开条数: {record['item_count']}" + if record.get('expanded_item_count') and record['expanded_item_count'] != record['item_count']: + count_line += f"(含嵌套 {record['expanded_item_count']} 条)" + chunks.append(count_line) + chunks.append("") + for item in record['items']: + chunks.extend(_format_item_lines(item)) + chunks.append("") + return "\n".join(chunks) + + +def _format_item_lines(item, label=None, indent=0): + label = label or f"{item['index']:03d}" + prefix = " " * indent + continuation = "\n" + prefix + " " + text = item['text'].replace("\n", continuation) + time = item['time'] or "未知时间" + source = item['source'] or "未知发送者" + lines = [f"{prefix}{label}. [{time}] {source} ({item['type']}): {text}"] + for child in item.get('children') or []: + child_label = f"{label}.{child['index']:03d}" + lines.extend(_format_item_lines(child, label=child_label, indent=indent + 1)) + return lines diff --git a/wechat_cli/core/messages.py b/wechat_cli/core/messages.py index d62ef33..d47530f 100644 --- a/wechat_cli/core/messages.py +++ b/wechat_cli/core/messages.py @@ -1,6 +1,7 @@ """消息查询 — 分表查找、分页、格式化""" import hashlib +import html import os import re import sqlite3 @@ -14,9 +15,13 @@ _zstd_dctx = zstd.ZstdDecompressor() _XML_UNSAFE_RE = re.compile(r'])') _XML_PARSE_MAX_LEN = 20000 _QUERY_LIMIT_MAX = 500 _HISTORY_QUERY_BATCH_SIZE = 500 +_FORWARDED_RECORD_QUERY_BATCH_SIZE = 500 +_FORWARDED_RECORD_SCAN_LIMIT_DEFAULT = 5000 +_FORWARDED_RECORD_MAX_DEPTH = 4 # 消息类型过滤映射: 名称 -> (base_type,) 或 (base_type, sub_type) MSG_TYPE_FILTERS = { @@ -33,6 +38,15 @@ } MSG_TYPE_NAMES = list(MSG_TYPE_FILTERS.keys()) +FORWARDED_RECORD_TYPE_LABELS = { + '1': '文本', + '2': '图片', + '5': '链接', + '19': '合并聊天记录', + '17': '位置/分享', + '37': '引用/小程序', +} + # ---- 消息 DB 发现 ---- @@ -133,6 +147,83 @@ def _collapse_text(text): return re.sub(r'\s+', ' ', text).strip() +def _clean_record_text(text): + if not text: + return '' + text = text.replace('', '') + text = html.unescape(text) + text = text.replace('\r\n', '\n').replace('\r', '\n') + lines = [re.sub(r'[ \t]+', ' ', line).strip() for line in text.split('\n')] + return '\n'.join(line for line in lines if line).strip() + + +def _extract_tag_text(block, tag): + match = re.search(rf'<{tag}[^>]*>(.*?)', block or '', re.S) + return _clean_record_text(match.group(1)) if match else '' + + +def _extract_attr_text(attrs, name): + match = re.search(rf'{name}="([^"]*)"', attrs or '') + return html.unescape(match.group(1)) if match else '' + + +def _contains_forwarded_record_markup(text): + return bool(text and '', open_pos) + if open_end == -1: + break + + attrs = content[open_pos + len(' 0: + next_match = _DATAITEM_OPEN_RE.search(content, cursor) + next_open = next_match.start() if next_match else -1 + next_close = content.find(close_tag, cursor) + if next_close == -1: + return + if next_open != -1 and next_open < next_close: + nested_open_end = content.find('>', next_open) + if nested_open_end == -1: + return + depth += 1 + cursor = nested_open_end + 1 + continue + + depth -= 1 + if depth == 0: + yield attrs, content[open_end + 1:next_close] + pos = next_close + len(close_tag) + break + cursor = next_close + len(close_tag) + + +def _extract_nested_forwarded_record_payload(body): + decoded = html.unescape(body or '') + if _contains_forwarded_record_markup(decoded): + return decoded + return '' + + +def _count_forwarded_record_items(items): + total = 0 + for item in items: + total += 1 + total += _count_forwarded_record_items(item.get('children') or []) + return total + + def _parse_xml_root(content): if not content or len(content) > _XML_PARSE_MAX_LEN or _XML_UNSAFE_RE.search(content): return None @@ -149,6 +240,68 @@ def _parse_int(value, fallback=0): return fallback +def _format_record_item_time(value): + value = (value or '').strip() + if not value: + return '' + try: + return datetime.fromtimestamp(int(value)).strftime('%Y-%m-%d %H:%M:%S') + except (TypeError, ValueError, OSError, OverflowError): + return value + + +def parse_forwarded_record_items(content, depth=0, max_depth=_FORWARDED_RECORD_MAX_DEPTH): + """解析微信“合并聊天记录”卡片内的 recorditem/dataitem。 + + 这类消息通常是 type=49/subtype=19 的 appmsg。普通 history/export 只能 + 展示外层摘要;这里展开本地已缓存的 dataitem 文本,并自动递归解析嵌套 + 的合并聊天记录。 + """ + if not content: + return [] + if '= candidate_limit: + break + except Exception as e: + failures.append(f"local_id={row[0]}: {e}") + if len(rows) < batch_size: + break + except Exception as e: + failures.append(f"{table_ctx['db_path']}: {e}") + + paged = _page_ranked_entries(collected, limit, offset) + return [record for _, record in paged], failures + + # ---- 搜索查询 ---- def _collect_search_entries(conn, contexts, names, keyword, display_name_fn, start_ts=None, end_ts=None, candidate_limit=20, msg_type_filter=None): diff --git a/wechat_cli/main.py b/wechat_cli/main.py index 3400c06..7a3eafc 100644 --- a/wechat_cli/main.py +++ b/wechat_cli/main.py @@ -24,6 +24,7 @@ def cli(ctx, config_path): wechat-cli sessions --limit 10 # 最近 10 个会话 wechat-cli history "张三" --limit 20 # 查看张三的最近 20 条消息 wechat-cli history "AI交流群" --start-time "2026-04-01" # 指定时间范围 + wechat-cli records "张三" --format text # 展开合并聊天记录 wechat-cli search "Claude" --chat "AI交流群" # 在指定群里搜索关键词 wechat-cli search "你好" --limit 50 # 全局搜索 wechat-cli contacts --query "李" # 搜索联系人 @@ -55,6 +56,7 @@ def cli(ctx, config_path): from .commands.stats import stats from .commands.unread import unread from .commands.favorites import favorites +from .commands.records import records cli.add_command(init) cli.add_command(sessions) @@ -67,6 +69,7 @@ def cli(ctx, config_path): cli.add_command(stats) cli.add_command(unread) cli.add_command(favorites) +cli.add_command(records) if __name__ == "__main__":