-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextRankAnalysis.py
More file actions
59 lines (40 loc) · 1.51 KB
/
TextRankAnalysis.py
File metadata and controls
59 lines (40 loc) · 1.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
'''
This document 'TextRankAnalysis.py' reads the ouptut CSV generated by the TextRank algorithm to analyze and visualize the results.
Note that this script analyzes TextRankOutputFinished.csv, as opposed to TextRankOutput.csv, which is generated by TextRank.py.
The difference in output names is to avoid accidentally overwriting the output generated by TextRank.py.
@author Nisanur Genc
@author Alex Wills
'''
import matplotlib.pyplot as plt
import os
import csv
import DatasetAnalysis as analyze
def main():
# The output file to analyze
output_file = 'TextRankOutputFinished.csv'
rootdir = os.path.dirname(__file__)
oneshot = True
document_lengths = analyze.build_length_dict()
# X and Y axis lists for scatterplot
lengths = []
scores = []
with open( os.path.join(rootdir, output_file), mode='r', encoding='utf-8') as outfile:
header = outfile.readline()
reader = csv.reader(outfile)
for entry in reader:
doc_size = document_lengths[entry[0]]
score = entry[-1]
lengths.append(int(doc_size))
scores.append(float(score))
avg_score = sum(scores) / len(scores)
print("Average Rouge-2 Score:", avg_score)
min_score = min(scores)
max_score = max(scores)
print("Minimum Score:", min_score)
print("Maximum Score:", max_score)
plt.scatter(lengths, scores)
plt.xlabel("Document Size (# Sentences)")
plt.ylabel("Rouge-2 Score")
plt.show()
if __name__ == "__main__":
main()