forked from Ahuanghaifeng/python3-torrent
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml_parser.py
More file actions
28 lines (24 loc) · 940 Bytes
/
html_parser.py
File metadata and controls
28 lines (24 loc) · 940 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from bs4 import BeautifulSoup
import re
import urllib.parse
class HtmlParser(object):
# 解析种子文件
def parserTwo(self,html):
if html is None:
return
soup = BeautifulSoup(html,'html.parser',from_encoding='utf-8')
res_datas = self._get_data(soup)
return res_datas
# 将种子文件的标题,磁力链接和迅雷链接进行封装
def _get_data(self,soup):
res_datas = []
all_data = soup.findAll('a',href=re.compile(r"/detail"))
all_data2 = soup.findAll('a', href=re.compile(r"magnet"))
all_data3 = soup.findAll('a',href=re.compile(r"thunder"))
for i in range(len(all_data)):
res_data = {}
res_data['title'] = all_data[i].get_text()
res_data['cl'] = all_data2[i].get('href')
res_data['xl'] = all_data3[i].get('href')
res_datas.append(res_data)
return res_datas