-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcrawler.py
More file actions
71 lines (53 loc) · 2.51 KB
/
crawler.py
File metadata and controls
71 lines (53 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
import os
import json
import requests
from bs4 import BeautifulSoup
from flask import Flask, session, render_template, request, Response, render_template_string, g
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(120)
@app.route('/searchQuery')
def search_by_query():
query = request.args.get('q')
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'}
query = 'https://www.youtube.com/results?search_query=' + query
content = requests.get(query, headers=headers)
soup = BeautifulSoup(content.text, 'html.parser')
initial_data = soup.find('script', string=re.compile('ytInitialData'))
str_initial_data = str(initial_data)
extracted_json_text = str_initial_data.split(';')[0].strip()
new_extracted_json_text = re.sub('<script nonce=.*>var ytInitialData = ','',extracted_json_text)
video_results = json.loads(new_extracted_json_text)
objectReturn = {}
# Arrumar para não usar try catch como lógica de execução
try:
video_json = video_results['contents']['twoColumnSearchResultsRenderer']['primaryContents']["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]['videoRenderer']
video_id = video_json['videoId']
video_name = video_json['title']['runs'][0]['text']
objectReturn['videoID'] = video_id
objectReturn['videoTitle'] = video_name
objectReturn['videoFounded'] = True
except:
objectReturn['videoID'] = 0
objectReturn['videoTitle'] = ''
objectReturn['videoFounded'] = False
return json.dumps(objectReturn)
@app.route('/searchURL')
def search_by_url():
search = request.args.get('url')
content = requests.get(search)
objectReturn = {}
if 'Vídeo indisponível' in content.text:
objectReturn['videoID'] = 0
objectReturn['videoTitle'] = ''
objectReturn['videoFounded'] = False
else:
soup = BeautifulSoup(content.text, 'html.parser')
video_name = soup.find(itemprop='name')
video_id = soup.find(itemprop='videoId')
objectReturn['videoID'] = video_id['content']
objectReturn['videoTitle'] = video_name['content']
objectReturn['videoFounded'] = True
return json.dumps(objectReturn)
if __name__ == '__main__':
app.run(host='127.0.0.1', port=4000, debug=False)