Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ sa.json

# Local files
*.txt
*.log
*.log

# Agents
.gemini/*
.superpowers/*
45 changes: 30 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,30 +44,45 @@ uv run monitor.py

## 2. Twitch Downloader (`twitch_download.py`)

Download VODs from Twitch using `yt-dlp` (optimized for archiving).
Download VODs from Twitch using `yt-dlp` and `chat_utils`.

### Usage
```bash
uv run twitch_download.py "TWITCH_VOD_URL"
```

### Features
- **Optimized Video**: Downloads `480p` (or best ≤ 480p) to `original.mp4` (small size for processing).
- **Direct Audio**: Downloads `Audio Only` stream directly to `audio.mp4` (no re-encoding if possible).
- **High Speed**: Uses 10 concurrent threads for downloading.
- **Optimized Video**: Downloads `480p` (or best ≤ 480p) to `original.mp4`.
- **Chat Download**: Automatically fetches full Twitch chat logs using the GQL API (bypasses 404 errors).
- **Direct Audio**: Extracts audio directly to `audio.mp4`.
- **Auto-Pipeline**:
1. Downloads Video & Audio.
1. Downloads Video, Audio, and Chat.
2. Transcribes Audio (generating `transcript.srt`).
3. Splits SRT by hour.
4. Triggers N8N workflow (`analyze`).
3. Triggers N8N workflow (`analyze`).

### Options
- `--root_dir`: Base directory.
- `--audio` / `--no-audio`: Toggle audio download.
---

## 3. Chat Downloader (`chat_utils.py`)

A robust Twitch chat downloader using the modern GQL API.

### Usage
```bash
# Download entire chat
uv run python chat_utils.py "TWITCH_VOD_URL" chat.json

# Download specific range (e.g., first hour)
uv run python chat_utils.py "TWITCH_VOD_URL" chat.json 0 60
```

### Features
- **GQL Powered**: Uses Twitch's internal GraphQL API for high reliability.
- **Range Support**: Can jump to any timestamp using `--start_min` and `--duration_min`.
- **Format Compatible**: Produces a structured JSON chat log.

---

## 3. YouTube Downloader (`yt_download.py`)
## 4. YouTube Downloader (`yt_download.py`)

Download videos from YouTube.

Expand All @@ -84,7 +99,7 @@ uv run yt_download.py "YOUTUBE_URL"

---

## 4. Transcription Tool (`transcript.py`)
## 5. Transcription Tool (`transcript.py`)

Generate SRT subtitles using **AssemblyAI** (default) or **faster-whisper**, with translation support using **Google Translate** (default) or **Ollama**.

Expand All @@ -110,7 +125,7 @@ uv run transcript.py /path/to/video.mp4 --zh_output "zh.srt"

---

## 5. Generic Translator (`translate.py`)
## 6. Generic Translator (`translate.py`)

Simple CLI tool to translate text/files using Google Translate.

Expand All @@ -125,7 +140,7 @@ uv run translate.py path/to/file.txt

---

## 6. Batch Clipper (`crop.py`)
## 7. Batch Clipper (`crop.py`)

Process a long video into multiple clips based on a list.

Expand All @@ -142,7 +157,7 @@ uv run crop.py /path/to/RootFolder

---

## 7. Interactive Crop UI (`main.py`)
## 8. Interactive Crop UI (`main.py`)

Visual tool to determine FFmpeg crop parameters.

Expand Down
146 changes: 146 additions & 0 deletions chat_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import requests
import json
import os
import time
import sys

def download_chat(video_url_or_id, output_path, start_min=None, duration_min=None):
"""
Downloads Twitch chat logs using the GQL API (same approach as lay295/TwitchDownloader).
Handles pagination and maps output to a format compatible with analyzer.py.
"""
# Extract video ID
video_id = video_url_or_id
if "/" in video_url_or_id:
video_id = video_url_or_id.rstrip("/").split("/")[-1]

# Check if video_id is just numbers
if not video_id.isdigit():
print(f"Error: Could not extract a valid video ID from {video_url_or_id}")
return False

url = "https://gql.twitch.tv/gql"
client_id = "kd1unb4b3q4t58fwlpcbzcbnm76a8fp"
sha256_hash = "b70a3591ff0f4e0313d126c6a1502d79a1c02baebb288227c582044aa76adf6a"

headers = {
"Client-Id": client_id,
"Content-Type": "application/json"
}

start_seconds = (start_min * 60) if start_min is not None else 0
end_seconds = (start_seconds + (duration_min * 60)) if duration_min is not None else float('inf')

all_comments = []
cursor = None

print(f"[*] Starting chat download for video {video_id}...")
if start_min is not None:
print(f"[*] Filtering for range: {start_min}m to {start_min + (duration_min or 0)}m")

while True:
variables = {
"videoID": video_id
}

if cursor:
variables["cursor"] = cursor
else:
variables["contentOffsetSeconds"] = start_seconds

payload = [{
"operationName": "VideoCommentsByOffsetOrCursor",
"variables": variables,
"extensions": {
"persistedQuery": {
"version": 1,
"sha256Hash": sha256_hash
}
}
}]

try:
response = requests.post(url, json=payload, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
if isinstance(data, list):
data = data[0]

if "errors" in data:
print(f"[!] GQL Error: {data['errors']}")
break

video_data = data.get("data", {}).get("video", {})
if not video_data:
print("[!] No video data found in response.")
break

comments_data = video_data.get("comments", {})
edges = comments_data.get("edges", [])

if not edges:
break

last_offset = 0
for edge in edges:
node = edge.get("node", {})
offset = node.get("contentOffsetSeconds", 0)
last_offset = offset

# Check if we passed the end_seconds (if specified)
if offset > end_seconds:
break

if offset >= start_seconds:
# Map to format expected by analyzer.py (rechat-like)
all_comments.append({
"content_offset_seconds": offset,
"message": node.get("message", {}),
"commenter": node.get("commenter", {})
})

# Check if we broke early due to end_seconds
if last_offset > end_seconds:
break

# Get cursor for next page
page_info = comments_data.get("pageInfo", {})
if page_info.get("hasNextPage"):
cursor = edges[-1].get("cursor")
# Simple progress report
print(f"[*] Collected {len(all_comments)} comments... (Current time: {int(last_offset//60)}m)", end='\r')
else:
break

# Rate limiting / polite pause
time.sleep(0.1)

except Exception as e:
print(f"\n[!] Error during download: {e}")
# Could implement retry here
break

print(f"\n[*] Download complete. Total comments: {len(all_comments)}")

if not all_comments:
print("[!] No comments found for the specified range.")
return False

try:
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(all_comments, f, ensure_ascii=False, indent=2)
print(f"[*] Chat log saved to {output_path}")
return True
except Exception as e:
print(f"[!] Error saving chat file: {e}")
return False

if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python chat_utils.py <video_url_or_id> <output_path> [start_min] [duration_min]")
else:
v_id = sys.argv[1]
out = sys.argv[2]
s_min = int(sys.argv[3]) if len(sys.argv) > 3 else None
d_min = int(sys.argv[4]) if len(sys.argv) > 4 else None
download_chat(v_id, out, s_min, d_min)
27 changes: 22 additions & 5 deletions crop.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import os
import re
import argparse


from transcript import transcribe_video
import ffmpeg
from facecam_utils import detect_facecam

# ================= 配置區域 =================
INPUT_FILE_NAME = "original.mp4"
Expand Down Expand Up @@ -190,22 +189,40 @@ def process(root_dir, crop_cam, crop_screen, start_arg=None, end_arg=None):
with open(os.path.join(output_folder, "metadata.md"), "w", encoding="utf-8") as f:
f.write(final_metadata)

def resolve_cam_param(root_dir, cam_arg):
if cam_arg != "auto":
return cam_arg

input_video_path = os.path.join(root_dir, INPUT_FILE_NAME)
if not os.path.exists(input_video_path):
print(f"Warning: {input_video_path} not found. Using default: {DEFAULT_CROP_CAM}")
return DEFAULT_CROP_CAM

print("Auto-detecting facecam (ML)...")
detected = detect_facecam(input_video_path)
if detected:
print(f"Detected: {detected}")
return detected

print(f"Detection failed. Using default: {DEFAULT_CROP_CAM}")
return DEFAULT_CROP_CAM

def main():
parser = argparse.ArgumentParser(description="自動剪輯工具")
parser.add_argument("root_dir", help="包含 crop_info.md 和 original.mp4 的根目錄路徑")
parser.add_argument("--cam", default=DEFAULT_CROP_CAM, help=f"Camera crop parameter (default: {DEFAULT_CROP_CAM})")
parser.add_argument("--cam", default=DEFAULT_CROP_CAM, help=f"Camera crop parameter (default: {DEFAULT_CROP_CAM}). Use 'auto' for ML detection.")
parser.add_argument("--screen", default=DEFAULT_CROP_SCREEN, help=f"Screen crop parameter (default: {DEFAULT_CROP_SCREEN})")
parser.add_argument("--start", help="Start time (e.g. 00:00:10). usage with --end")
parser.add_argument("--end", help="End time (e.g. 00:00:20). usage with --start")

args = parser.parse_args()

# Validation
if (args.start and not args.end) or (args.end and not args.start):
print("錯誤: --start 和 --end 必須同時提供")
return

process(args.root_dir, args.cam, args.screen, start_arg=args.start, end_arg=args.end)
cam_param = resolve_cam_param(args.root_dir, args.cam)
process(args.root_dir, cam_param, args.screen, start_arg=args.start, end_arg=args.end)

if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions crop_info.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Shorts Number,Start Timestamp,End Timestamp,Selling Point,Suggested Title/Hook (Including Hashtags),SEO Subtitle
9,02:14:27,02:16:40,"Reaction to a woman complaining about a date who 'psychoanalyzed' her and correctly guessed she was on anxiety meds. Streamer sides with the guy.","Her Date Had Her ALL Figured Out! 😂 #dating #anxiety #funny #reaction","Streamer reacts to woman complaining about a date who psychoanalyzed her"
Loading