-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathTag_extractor.py
More file actions
48 lines (42 loc) · 1.74 KB
/
Tag_extractor.py
File metadata and controls
48 lines (42 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from bs4 import BeautifulSoup
import json
import requests as req
import urllib.parse as par
print("....YOUTUBE VIDEOS TAG EXTRACTOR....\n")
print("Description:\n==>With the help of this tool you can easily extract tags from popular videos on youtube.\n==> These tags are very useful for YouTube Search Engine Optimization.\n")
keyword = input("Enter Your Search Keyword:")
while keyword == '' or keyword == ' ':
print("Invalid input detected")
keyword = input("Enter a Keyword:")
def walker(keyword):
search = {'search_query':keyword}
url = 'https://www.youtube.com/results?{}'.format(par.urlencode(search))
print("Url Generated: "+url)
content = req.get(url)
soup = BeautifulSoup(content.content, 'html.parser')
a_tags = soup.findAll("a", attrs={"class": "yt-uix-sessionlink spf-link", "aria-hidden":"true"})
for tag in a_tags:
if "/watch" in tag['href']:
yield tag['href']
def tag_extractor(url):
content = req.get(url)
soup = BeautifulSoup(content.content, 'html.parser')
for script in soup.findAll('script'):
if 'keywords' in str(script):
start_ind = str(script).index('keywords')
start_ind += 10
rest_out = str(str(script)[start_ind:])
end_ind = rest_out.index(']')
final = rest_out[:end_ind+1]
final = final.replace('\\', '')
final = final.replace(':', '')
return (json.loads(final))
final = []
for link in walker(keyword):
url = 'https://www.youtube.com' + link
out = tag_extractor(url)
if out is not None:
print(out)
final += out
with open('sample.txt', 'w') as file:
json.dump(final, file, indent=2)