forked from alexeygrigorev/loom-transcript-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug.py
More file actions
120 lines (96 loc) · 4.33 KB
/
debug.py
File metadata and controls
120 lines (96 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import subprocess
import time
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
# File paths
input_file = 'loom-videos.txt'
processed_file = 'loom-videos-processed.txt'
# Chrome paths
chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
user_data_dir = os.path.expanduser('~') + r"\AppData\Local\Google\Chrome\User Data"
print(f"Chrome path: {chrome_path}")
print(f"User data directory: {user_data_dir}")
# Launch Chrome with remote debugging port
cmd = f'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="{user_data_dir}"'
print(f"Launching Chrome with command: {cmd}")
subprocess.Popen(cmd, shell=True)
print("Waiting for Chrome to launch...")
time.sleep(5) # Give Chrome some time to start
print("Chrome has been launched. Please log in to Loom manually if needed.")
input("Press Enter when you're logged in and ready to proceed...")
# Set up Chrome options for Selenium
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# Initialize the WebDriver
print("Connecting to Chrome...")
driver = webdriver.Chrome(options=chrome_options)
print("Connected to Chrome. Verifying connection...")
try:
print(f"Current URL: {driver.current_url}")
print(f"Page title: {driver.title}")
except Exception as e:
print(f"Error verifying connection: {str(e)}")
exit(1)
try:
# Navigate to Loom
print("Navigating to Loom...")
driver.get("https://www.loom.com")
time.sleep(5)
print(f"Current URL after navigation: {driver.current_url}")
print(f"Page title after navigation: {driver.title}")
# Now proceed with the Loom video processing
video_ids = [line.strip() for line in open(input_file, 'r') if line.strip()]
for video_id in video_ids:
try:
url = f"https://www.loom.com/share/{video_id}"
print(f"\nOpening URL: {url}")
driver.get(url)
print("Waiting for page to load...")
time.sleep(10)
print("Current page title:", driver.title)
print("Looking for 'More actions' button...")
more_actions_button = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.ID, "toggleActions"))
)
print("'More actions' button found. Clicking...")
driver.execute_script("arguments[0].scrollIntoView();", more_actions_button)
more_actions_button.click()
time.sleep(2)
print("Looking for 'Download captions' option...")
download_captions_option = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Download captions')]"))
)
print("'Download captions' option found. Clicking...")
driver.execute_script("arguments[0].scrollIntoView();", download_captions_option)
download_captions_option.click()
print("Waiting for download to start...")
time.sleep(5)
print(f"Processed video: {video_id}")
with open(processed_file, 'a') as f:
f.write(f"{video_id}\n")
with open(input_file, 'r') as f:
lines = f.readlines()
with open(input_file, 'w') as f:
f.writelines(line for line in lines if line.strip() != video_id)
except TimeoutException:
print(f"Timeout occurred while processing video {video_id}")
except Exception as e:
print(f"Error processing video {video_id}: {str(e)}")
print("Waiting before next video...")
time.sleep(5)
except Exception as e:
print(f"An error occurred: {str(e)}")
finally:
input("Press Enter to close the browser...")
try:
driver.quit()
except WebDriverException:
print("WebDriver was already closed.")
except Exception as e:
print(f"Error while closing the browser: {str(e)}")
print("Script completed. You can now close the Chrome window manually.")