-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscrape.py
More file actions
26 lines (20 loc) · 684 Bytes
/
scrape.py
File metadata and controls
26 lines (20 loc) · 684 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import re
from requests import get
from bs4 import BeautifulSoup
from collections import OrderedDict
def get_torrents(url: str) -> OrderedDict:
response = get(url)
if not response.ok:
return None
soup = BeautifulSoup(response.text, "lxml")
parents = OrderedDict()
parents_bs = [t.parent for t in soup.find_all(href=re.compile("magnet"))]
if not parents_bs:
return None
for i in range(len(parents_bs)):
if parents_bs[i] not in parents.values():
parents[str(i)] = parents_bs[i]
for key, value in parents.items():
parents[key] = [tag.attrs["href"]
for tag in value.findChildren(href=re.compile("magnet"))]
return parents