YouTube-Tag-Extractor/Tag_extractor.py at master · Amansinghtech/YouTube-Tag-Extractor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from bs4 import BeautifulSoup
import json
import requests as req
import urllib.parse as par
print("....YOUTUBE VIDEOS TAG EXTRACTOR....\n")
print("Description:\n==>With the help of this tool you can easily extract tags from popular videos on youtube.\n==> These tags are very useful for YouTube Search Engine Optimization.\n")

keyword = input("Enter Your Search Keyword:")
while keyword == '' or keyword == ' ':
    print("Invalid input detected")
    keyword = input("Enter a Keyword:")

def walker(keyword):
    search = {'search_query':keyword}
    url =  'https://www.youtube.com/results?{}'.format(par.urlencode(search))
    print("Url Generated: "+url)
    content = req.get(url)
    soup = BeautifulSoup(content.content, 'html.parser')
    a_tags = soup.findAll("a", attrs={"class": "yt-uix-sessionlink spf-link", "aria-hidden":"true"})
    for tag in a_tags:
        if "/watch" in tag['href']:
            yield tag['href']

def tag_extractor(url):
    content = req.get(url)
    soup = BeautifulSoup(content.content, 'html.parser')
    for script in soup.findAll('script'):
        if 'keywords' in str(script):
            start_ind = str(script).index('keywords')
            start_ind += 10
            rest_out = str(str(script)[start_ind:])
            end_ind = rest_out.index(']')
            final = rest_out[:end_ind+1]
            final = final.replace('\\', '')
            final = final.replace(':', '')
            return (json.loads(final))

final = []

for link in walker(keyword):
    url = 'https://www.youtube.com' + link
    out = tag_extractor(url)
    if out is not None:
        print(out)
        final += out

with open('sample.txt', 'w') as file:
    json.dump(final, file, indent=2)