-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathyoutube_mp3_cli.py
More file actions
executable file
·131 lines (106 loc) · 4.15 KB
/
youtube_mp3_cli.py
File metadata and controls
executable file
·131 lines (106 loc) · 4.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
"""
YouTube MP3 Transcription CLI
A simple command-line interface for transcribing YouTube videos to text.
"""
import argparse
import sys
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from src.youtube_mp3_service import YouTubeMP3TranscriptionService
def main():
parser = argparse.ArgumentParser(
description="Download MP3 from YouTube and transcribe with Google Gemini",
epilog="""
Examples:
%(prog)s "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
%(prog)s "https://youtu.be/9bZkp7q19f0" --keep-mp3
%(prog)s "https://www.youtube.com/watch?v=example" --output transcript.txt
""",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"url",
help="YouTube URL to transcribe"
)
parser.add_argument(
"--keep-mp3",
action="store_true",
help="Keep the downloaded MP3 file after transcription"
)
parser.add_argument(
"--output", "-o",
type=str,
help="Output file to save the transcript (default: print to stdout)"
)
parser.add_argument(
"--info-only",
action="store_true",
help="Only get video information without transcription"
)
parser.add_argument(
"--chunk-duration",
type=int,
default=300,
help="Maximum chunk duration in seconds (default: 300)"
)
args = parser.parse_args()
try:
# Initialize the service
print("🔄 Initializing YouTube MP3 transcription service...")
service = YouTubeMP3TranscriptionService()
# Set custom chunk duration if specified
if args.chunk_duration != 300:
service.max_chunk_duration = args.chunk_duration
print(f"⚙️ Set chunk duration to {args.chunk_duration} seconds")
# Validate URL
if not service.is_youtube_url(args.url):
print(f"❌ Error: Invalid YouTube URL: {args.url}")
sys.exit(1)
# Get video info
print("📋 Getting video information...")
video_info = service.get_video_info(args.url)
print(f"📝 Title: {video_info.get('title', 'Unknown')}")
print(f"🆔 Video ID: {video_info.get('id', 'Unknown')}")
print(f"⏱️ Duration: {video_info.get('duration', 0)} seconds ({video_info.get('duration', 0) / 60:.1f} minutes)")
print(f"👤 Uploader: {video_info.get('uploader', 'Unknown')}")
if args.info_only:
print("ℹ️ Info-only mode. Exiting without transcription.")
return
# Transcribe
print("🎵 Starting MP3 download and transcription...")
print("⏳ This may take a few minutes depending on video length...")
result = service.transcribe_youtube_video(args.url, keep_mp3=args.keep_mp3)
if result.success:
print(f"✅ Transcription completed successfully!")
print(f"📊 Processing time: {result.processing_time:.2f} seconds")
print(f"💾 Audio file size: {result.audio_file_size_mb:.2f} MB")
print(f"🧩 Chunks processed: {result.chunks_processed}")
print(f"📝 Transcript length: {len(result.transcript or '')} characters")
if args.keep_mp3:
print("💾 MP3 file was preserved")
# Output transcript
if args.output:
# Save to file
with open(args.output, 'w', encoding='utf-8') as f:
f.write(result.transcript or '')
print(f"💾 Transcript saved to: {args.output}")
else:
# Print to stdout
print("\n" + "="*60)
print("📖 TRANSCRIPT:")
print("="*60)
print(result.transcript or "No transcript generated")
print("="*60)
else:
print(f"❌ Transcription failed: {result.error}")
sys.exit(1)
except KeyboardInterrupt:
print("\n⛔ Operation cancelled by user")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()