-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_links.py
More file actions
39 lines (32 loc) · 1.2 KB
/
extract_links.py
File metadata and controls
39 lines (32 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3
"""Extract all links from a web page using Plasmate's SOM."""
import subprocess
import json
import sys
url = sys.argv[1] if len(sys.argv) > 1 else "https://news.ycombinator.com"
result = subprocess.run(["plasmate", "fetch", url], capture_output=True, text=True)
if result.returncode != 0:
print(f"Error: {result.stderr}", file=sys.stderr)
sys.exit(1)
som = json.loads(result.stdout)
print(f"Links from: {som.get('title', url)}\n")
links = []
for region in som.get("regions", []):
for el in region.get("elements", []):
if el.get("role") == "link" and el.get("href"):
links.append({
"text": el.get("text", "").strip(),
"href": el["href"],
"region": region.get("role", "unknown"),
})
print(f"Found {len(links)} links:\n")
for i, link in enumerate(links, 1):
text = link["text"][:60] if link["text"] else "(no text)"
print(f" {i:3d}. [{link['region']}] {text}")
print(f" → {link['href']}")
# Optionally save as JSON
if "--json" in sys.argv:
output_file = "links.json"
with open(output_file, "w") as f:
json.dump(links, f, indent=2)
print(f"\nSaved to {output_file}")