-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRelatedPhrase.py
More file actions
36 lines (22 loc) · 964 Bytes
/
RelatedPhrase.py
File metadata and controls
36 lines (22 loc) · 964 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import PDF_Parser
import urllib.request
def Relatable(path,word):
query = word + "wikipedia"
results = []
for j in search(query, tld="co.in", num=10, stop=10, pause=2):
results.append(j)
html = urllib.request.urlopen(results[0])
soup = BeautifulSoup(html,'html.parser')
get_similar = set()
print("\n\nRelated words to your Phrase are : \n")
for link in soup.findAll("a"):
if 'href' in link.attrs and link and not link.text.startswith('Jump') and not link.text.startswith('edit') and not word in link.text and not link.text.startswith('[') and link.text.isalpha():
print(link.text)
get_similar.add(link.text)
if len(get_similar) > 5:
break
for i in get_similar:
PDF_Parser.mainpharse(path,i)