Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ COPY control_server.py /opt/control_server.py
COPY site_runner.py /opt/site_runner.py
RUN chmod +x /opt/websyn_start.sh

EXPOSE 8101 40000-40014
EXPOSE 8101 40000-40015

CMD ["/opt/websyn_start.sh"]
2 changes: 1 addition & 1 deletion control_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'allrecipes', 'amazon', 'apple', 'arxiv', 'bbc_news', 'booking',
'github', 'google_flights', 'google_map', 'google_search',
'huggingface', 'wolfram_alpha', 'cambridge_dictionary',
'coursera', 'espn',
'coursera', 'espn', 'rotten_tomatoes',
]
BASE_PORT = 40000
WEBSYN_DIR = '/opt/WebSyn'
Expand Down
3 changes: 3 additions & 0 deletions sites/rotten_tomatoes/_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Per-site health probe (optional, called by control_server)."""
def health():
return {"ok": True, "site": "rotten_tomatoes"}
742 changes: 742 additions & 0 deletions sites/rotten_tomatoes/app.py

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions sites/rotten_tomatoes/download_people.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""Download celebrity photos from RT celebrity pages."""
import json, subprocess, os, re, sys

PEOPLE_DIR = "static/images/people"
os.makedirs(PEOPLE_DIR, exist_ok=True)

sys.path.insert(0, '.')
import seed_data

total = len(seed_data.PERSONS)
done = 0
failed = []

for p in seed_data.PERSONS:
slug = p['slug']
outpath = f"{PEOPLE_DIR}/{slug}.jpg"
if os.path.exists(outpath) and os.path.getsize(outpath) > 1000:
done += 1
continue

try:
result = subprocess.run(
['curl', '-sL', '--connect-timeout', '5', '--max-time', '10',
f'https://www.rottentomatoes.com/celebrity/{slug}'],
capture_output=True, text=True, timeout=15
)
html = result.stdout

# Find celebrity headshot - look for celeb image patterns
# Pattern 1: ems-prd-assets/celebrities/ (base64: ZW1zLXByZC1hc3NldHMvY2VsZWJyaXRpZXMv)
# Pattern 2: prd-ems-assets/celebrities/ (base64: cHJkLWVtcy1hc3NldHMvY2VsZWJyaXRpZXMv)
celeb_urls = re.findall(r'https://resizing\.flixster\.com/[^"]+(?:Y2VsZWJyaXRpZXMv|Y2VsZWJyaXRpZX)[^"]*', html)

if celeb_urls:
photo_url = celeb_urls[-1] # Last one is usually the main headshot
dl_result = subprocess.run(
['curl', '-sL', '-o', outpath, '--connect-timeout', '5', '--max-time', '15', photo_url],
capture_output=True, timeout=20
)
if dl_result.returncode == 0 and os.path.exists(outpath) and os.path.getsize(outpath) > 1000:
done += 1
else:
failed.append(slug)
if os.path.exists(outpath):
os.remove(outpath)
else:
failed.append(slug)
except Exception as e:
failed.append(slug)

if done % 20 == 0 and done > 0:
print(f"Downloaded {done}/{total}...", flush=True)

print(f"Downloaded: {done}/{total}")
if failed:
print(f"Failed ({len(failed)}): {', '.join(failed[:30])}")
42 changes: 42 additions & 0 deletions sites/rotten_tomatoes/download_posters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
"""Download poster images from flixster CDN URLs."""
import json, subprocess, os, sys

POSTER_DIR = "static/images/posters"
os.makedirs(POSTER_DIR, exist_ok=True)

with open('scraped_data/movies.json') as f:
data = json.load(f)

movies = data['movies']
total = len([m for m in movies if m.get('poster_url')])
done = 0
failed = []

for m in movies:
url = m.get('poster_url')
if not url:
continue
slug = m['slug']
outpath = f"{POSTER_DIR}/{slug}.jpg"
if os.path.exists(outpath) and os.path.getsize(outpath) > 1000:
done += 1
continue

result = subprocess.run(
['curl', '-sL', '-o', outpath, '--connect-timeout', '10', '--max-time', '30', url],
capture_output=True, timeout=35
)

if result.returncode == 0 and os.path.exists(outpath) and os.path.getsize(outpath) > 1000:
done += 1
if done % 20 == 0:
print(f"Downloaded {done}/{total}...")
else:
failed.append(slug)
if os.path.exists(outpath):
os.remove(outpath)

print(f"Downloaded: {done}/{total}")
if failed:
print(f"Failed ({len(failed)}): {failed[:10]}...")
1 change: 1 addition & 0 deletions sites/rotten_tomatoes/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Flask
Loading