-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconcept_parser.py
More file actions
67 lines (51 loc) · 1.55 KB
/
concept_parser.py
File metadata and controls
67 lines (51 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import requests
import re
def abstract_extractor(link_dbpedia):
link_dbpedia = link_dbpedia.replace("resource", "data")
link_dbpedia = link_dbpedia + ".xml"
print (link_dbpedia)
response = requests.get(link_dbpedia)
#print (response.headers['content-type'])
text =((response.content))
# abs = '<dbo:abstract xml:lang="en">'
#print (text)
try:
result = re.search('<dbo:abstract xml:lang="en">(.*)<\/dbo:abstract>', str(text))
value = ((result.group(1).split('\\n'))[0])
return (value)
except AttributeError:
return ("No abstract extracted\n")
def concept_parser(input_json):
"""
this function parse input concept json string into folowing output hashmap
INPUT:
input_json:
result from emotion parsing each 5 seconds of dictionary
OUTPUT:
concept_map:
resulting hashmap of key as emotion and value as the score of emotion
"""
input_string = json.loads(input_json)
concept_text_map = {}
list_dbplink = []
list_text = []
list_abstracts = []
for concepts in input_string['concepts']:
#print (concepts)
list_dbplink.append(concepts['dbpedia'])
for concepts in input_string['concepts']:
#print (concepts)
list_text.append(concepts['text'])
print (list_text)
for link in list_dbplink:
val = abstract_extractor(link)
list_abstracts.append(val)
print (list_abstracts)
concept_text_map = dict(zip(list_text, list_abstracts))
print (concept_text_map)
# abstract = abs.group(0)
# print (abstract)
if __name__ == "__main__":
input_string =open('nlp_test_out.txt').read()
concept_parser(input_string)