Robert-Rino · Robert-Rino · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,8 @@ sa.json
 
 # Local files
 *.txt
-*.log
+*.log
+
+# Agents
+.gemini/*
+.superpowers/*
diff --git a/README.md b/README.md
@@ -44,30 +44,45 @@ uv run monitor.py
 
 ## 2. Twitch Downloader (`twitch_download.py`)
 
-Download VODs from Twitch using `yt-dlp` (optimized for archiving).
+Download VODs from Twitch using `yt-dlp` and `chat_utils`.
 
 ### Usage
 ```bash
 uv run twitch_download.py "TWITCH_VOD_URL"
 ```
 
 ### Features
-- **Optimized Video**: Downloads `480p` (or best ≤ 480p) to `original.mp4` (small size for processing).
-- **Direct Audio**: Downloads `Audio Only` stream directly to `audio.mp4` (no re-encoding if possible).
-- **High Speed**: Uses 10 concurrent threads for downloading.
+- **Optimized Video**: Downloads `480p` (or best ≤ 480p) to `original.mp4`.
+- **Chat Download**: Automatically fetches full Twitch chat logs using the GQL API (bypasses 404 errors).
+- **Direct Audio**: Extracts audio directly to `audio.mp4`.
 - **Auto-Pipeline**:
-    1. Downloads Video & Audio.
+    1. Downloads Video, Audio, and Chat.
     2. Transcribes Audio (generating `transcript.srt`).
-    3. Splits SRT by hour.
-    4. Triggers N8N workflow (`analyze`).
+    3. Triggers N8N workflow (`analyze`).
 
-### Options
-- `--root_dir`: Base directory.
-- `--audio` / `--no-audio`: Toggle audio download.
+---
+
+## 3. Chat Downloader (`chat_utils.py`)
+
+A robust Twitch chat downloader using the modern GQL API.
+
+### Usage
+```bash
+# Download entire chat
+uv run python chat_utils.py "TWITCH_VOD_URL" chat.json
+
+# Download specific range (e.g., first hour)
+uv run python chat_utils.py "TWITCH_VOD_URL" chat.json 0 60
+```
+
+### Features
+- **GQL Powered**: Uses Twitch's internal GraphQL API for high reliability.
+- **Range Support**: Can jump to any timestamp using `--start_min` and `--duration_min`.
+- **Format Compatible**: Produces a structured JSON chat log.
 
 ---
 
-## 3. YouTube Downloader (`yt_download.py`)
+## 4. YouTube Downloader (`yt_download.py`)
 
 Download videos from YouTube.
 
@@ -84,7 +99,7 @@ uv run yt_download.py "YOUTUBE_URL"
 
 ---
 
-## 4. Transcription Tool (`transcript.py`)
+## 5. Transcription Tool (`transcript.py`)
 
 Generate SRT subtitles using **AssemblyAI** (default) or **faster-whisper**, with translation support using **Google Translate** (default) or **Ollama**.
 
@@ -110,7 +125,7 @@ uv run transcript.py /path/to/video.mp4 --zh_output "zh.srt"
 
 ---
 
-## 5. Generic Translator (`translate.py`)
+## 6. Generic Translator (`translate.py`)
 
 Simple CLI tool to translate text/files using Google Translate.
 
@@ -125,7 +140,7 @@ uv run translate.py path/to/file.txt
 
 ---
 
-## 6. Batch Clipper (`crop.py`)
+## 7. Batch Clipper (`crop.py`)
 
 Process a long video into multiple clips based on a list.
 
@@ -142,7 +157,7 @@ uv run crop.py /path/to/RootFolder
 
 ---
 
-## 7. Interactive Crop UI (`main.py`)
+## 8. Interactive Crop UI (`main.py`)
 
 Visual tool to determine FFmpeg crop parameters.
 

diff --git a/chat_utils.py b/chat_utils.py
@@ -0,0 +1,146 @@
+import requests
+import json
+import os
+import time
+import sys
+
+def download_chat(video_url_or_id, output_path, start_min=None, duration_min=None):
+    """
+    Downloads Twitch chat logs using the GQL API (same approach as lay295/TwitchDownloader).
+    Handles pagination and maps output to a format compatible with analyzer.py.
+    """
+    # Extract video ID
+    video_id = video_url_or_id
+    if "/" in video_url_or_id:
+        video_id = video_url_or_id.rstrip("/").split("/")[-1]
+
+    # Check if video_id is just numbers
+    if not video_id.isdigit():
+        print(f"Error: Could not extract a valid video ID from {video_url_or_id}")
+        return False
+
+    url = "https://gql.twitch.tv/gql"
+    client_id = "kd1unb4b3q4t58fwlpcbzcbnm76a8fp"
+    sha256_hash = "b70a3591ff0f4e0313d126c6a1502d79a1c02baebb288227c582044aa76adf6a"
+
+    headers = {
+        "Client-Id": client_id,
+        "Content-Type": "application/json"
+    }
+
+    start_seconds = (start_min * 60) if start_min is not None else 0
+    end_seconds = (start_seconds + (duration_min * 60)) if duration_min is not None else float('inf')
+
+    all_comments = []
+    cursor = None
+
+    print(f"[*] Starting chat download for video {video_id}...")
+    if start_min is not None:
+        print(f"[*] Filtering for range: {start_min}m to {start_min + (duration_min or 0)}m")
+
+    while True:
+        variables = {
+            "videoID": video_id
+        }
+
+        if cursor:
+            variables["cursor"] = cursor
+        else:
+            variables["contentOffsetSeconds"] = start_seconds
+
+        payload = [{
+            "operationName": "VideoCommentsByOffsetOrCursor",
+            "variables": variables,
+            "extensions": {
+                "persistedQuery": {
+                    "version": 1,
+                    "sha256Hash": sha256_hash
+                }
+            }
+        }]
+
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            if isinstance(data, list):
+                data = data[0]
+
+            if "errors" in data:
+                print(f"[!] GQL Error: {data['errors']}")
+                break
+
+            video_data = data.get("data", {}).get("video", {})
+            if not video_data:
+                print("[!] No video data found in response.")
+                break
+
+            comments_data = video_data.get("comments", {})
+            edges = comments_data.get("edges", [])
+
+            if not edges:
+                break
+
+            last_offset = 0
+            for edge in edges:
+                node = edge.get("node", {})
+                offset = node.get("contentOffsetSeconds", 0)
+                last_offset = offset
+
+                # Check if we passed the end_seconds (if specified)
+                if offset > end_seconds:
+                    break
+
+                if offset >= start_seconds:
+                    # Map to format expected by analyzer.py (rechat-like)
+                    all_comments.append({
+                        "content_offset_seconds": offset,
+                        "message": node.get("message", {}),
+                        "commenter": node.get("commenter", {})
+                    })
+
+            # Check if we broke early due to end_seconds
+            if last_offset > end_seconds:
+                break
+
+            # Get cursor for next page
+            page_info = comments_data.get("pageInfo", {})
+            if page_info.get("hasNextPage"):
+                cursor = edges[-1].get("cursor")
+                # Simple progress report
+                print(f"[*] Collected {len(all_comments)} comments... (Current time: {int(last_offset//60)}m)", end='\r')
+            else:
+                break
+
+            # Rate limiting / polite pause
+            time.sleep(0.1)
+
+        except Exception as e:
+            print(f"\n[!] Error during download: {e}")
+            # Could implement retry here
+            break
+
+    print(f"\n[*] Download complete. Total comments: {len(all_comments)}")
+
+    if not all_comments:
+        print("[!] No comments found for the specified range.")
+        return False
+
+    try:
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(all_comments, f, ensure_ascii=False, indent=2)
+        print(f"[*] Chat log saved to {output_path}")
+        return True
+    except Exception as e:
+        print(f"[!] Error saving chat file: {e}")
+        return False
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Usage: python chat_utils.py <video_url_or_id> <output_path> [start_min] [duration_min]")
+    else:
+        v_id = sys.argv[1]
+        out = sys.argv[2]
+        s_min = int(sys.argv[3]) if len(sys.argv) > 3 else None
+        d_min = int(sys.argv[4]) if len(sys.argv) > 4 else None
+        download_chat(v_id, out, s_min, d_min)
diff --git a/crop.py b/crop.py
@@ -1,10 +1,9 @@
 import os
 import re
 import argparse
-
-
 from transcript import transcribe_video 
 import ffmpeg
+from facecam_utils import detect_facecam
 
 # ================= 配置區域 =================
 INPUT_FILE_NAME = "original.mp4"
@@ -190,22 +189,40 @@ def process(root_dir, crop_cam, crop_screen, start_arg=None, end_arg=None):
         with open(os.path.join(output_folder, "metadata.md"), "w", encoding="utf-8") as f:
             f.write(final_metadata)
 
+def resolve_cam_param(root_dir, cam_arg):
+    if cam_arg != "auto":
+        return cam_arg
+
+    input_video_path = os.path.join(root_dir, INPUT_FILE_NAME)
+    if not os.path.exists(input_video_path):
+        print(f"Warning: {input_video_path} not found. Using default: {DEFAULT_CROP_CAM}")
+        return DEFAULT_CROP_CAM
+
+    print("Auto-detecting facecam (ML)...")
+    detected = detect_facecam(input_video_path)
+    if detected:
+        print(f"Detected: {detected}")
+        return detected
+
+    print(f"Detection failed. Using default: {DEFAULT_CROP_CAM}")
+    return DEFAULT_CROP_CAM
+
 def main():
     parser = argparse.ArgumentParser(description="自動剪輯工具")
     parser.add_argument("root_dir", help="包含 crop_info.md 和 original.mp4 的根目錄路徑")
-    parser.add_argument("--cam", default=DEFAULT_CROP_CAM, help=f"Camera crop parameter (default: {DEFAULT_CROP_CAM})")
+    parser.add_argument("--cam", default=DEFAULT_CROP_CAM, help=f"Camera crop parameter (default: {DEFAULT_CROP_CAM}). Use 'auto' for ML detection.")
     parser.add_argument("--screen", default=DEFAULT_CROP_SCREEN, help=f"Screen crop parameter (default: {DEFAULT_CROP_SCREEN})")
     parser.add_argument("--start", help="Start time (e.g. 00:00:10). usage with --end")
     parser.add_argument("--end", help="End time (e.g. 00:00:20). usage with --start")
 
     args = parser.parse_args()
 
-    # Validation
     if (args.start and not args.end) or (args.end and not args.start):
         print("錯誤: --start 和 --end 必須同時提供")
         return
 
-    process(args.root_dir, args.cam, args.screen, start_arg=args.start, end_arg=args.end)
+    cam_param = resolve_cam_param(args.root_dir, args.cam)
+    process(args.root_dir, cam_param, args.screen, start_arg=args.start, end_arg=args.end)
 
 if __name__ == "__main__":
     main()
diff --git a/crop_info.csv b/crop_info.csv
@@ -0,0 +1,2 @@
+Shorts Number,Start Timestamp,End Timestamp,Selling Point,Suggested Title/Hook (Including Hashtags),SEO Subtitle
+9,02:14:27,02:16:40,"Reaction to a woman complaining about a date who 'psychoanalyzed' her and correctly guessed she was on anxiety meds. Streamer sides with the guy.","Her Date Had Her ALL Figured Out! 😂 #dating #anxiety #funny #reaction","Streamer reacts to woman complaining about a date who psychoanalyzed her"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Shorts Number,Start Timestamp,End Timestamp,Selling Point,Suggested Title/Hook (Including Hashtags),SEO Subtitle
		9,02:14:27,02:16:40,"Reaction to a woman complaining about a date who 'psychoanalyzed' her and correctly guessed she was on anxiety meds. Streamer sides with the guy.","Her Date Had Her ALL Figured Out! 😂 #dating #anxiety #funny #reaction","Streamer reacts to woman complaining about a date who psychoanalyzed her"