-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathranking.py
More file actions
46 lines (35 loc) · 1.58 KB
/
ranking.py
File metadata and controls
46 lines (35 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from transformers import pipeline
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
class Ranking:
def __init__(self):
self.classifier = pipeline('zero-shot-classification', model='roberta-large-mnli')
def get_ranks(self, theme, terms):
if (len(terms) <= 0): return []
elif (len(terms) == 1): return [{ "term": terms[0], "level": 1 }]
else:
sequence_to_classify = f"to learn about {theme}"
hypothesis_template = "learning about {} is important"
result = self.classifier(sequence_to_classify, terms, hypothesis_template=hypothesis_template)
return self.format_response(result)
def group_scores(self, scores, terms):
standard_deviation = np.std(scores)
labels = [1]
clusters = [{
"leader": scores[0],
"scores": [scores[0]]
}]
cluster_number = 0
for score in scores[1:]:
if clusters[cluster_number]["leader"] - score - standard_deviation <= 0:
clusters[cluster_number]["scores"].append(score)
else:
clusters.append({
"leader": score,
"scores": [score]
})
cluster_number += 1
labels.append(cluster_number + 1)
return [{ "term": terms[i], "level": labels[i] } for i in range(len(labels))]
def format_response(self, response):
return self.group_scores(response["scores"], response["labels"])