-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathheatmap.py
More file actions
54 lines (46 loc) · 1.67 KB
/
heatmap.py
File metadata and controls
54 lines (46 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sentence_transformers import SentenceTransformer
import seaborn as sns
import matplotlib.pyplot as plt
# Загрузка датасета
df = pd.read_csv("dataset.csv")
texts = df["text"].tolist()
labels = df["label"].tolist()
# Эмбеддинги
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts)
# Вычисление важности координат
encoder = LabelEncoder()
y = encoder.fit_transform(labels)
clf = RandomForestClassifier()
clf.fit(embeddings, y)
importance = clf.feature_importances_
top_n = 10
top_dims = np.argsort(importance)[-top_n:][::-1]
# Присвоим имена первым 3 осям, остальные — по индексам
named_dims = {
top_dims[0]: "эмоциональность",
top_dims[1]: "вопросительность",
top_dims[2]: "научность"
}
for i in range(3, top_n):
named_dims[top_dims[i]] = f"dim_{top_dims[i]}"
# Построение таблицы
heatmap_data = []
for emb in embeddings:
row = [emb[dim] for dim in top_dims]
heatmap_data.append(row)
heatmap_df = pd.DataFrame(heatmap_data, columns=[named_dims[d] for d in top_dims])
heatmap_df["label"] = labels
heatmap_df["text"] = texts
# Визуализация
plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_df.drop(columns=["label", "text"]), cmap="coolwarm", linewidths=0.5)
plt.title("Heatmap по смысловым координатам эмбеддингов")
plt.xlabel("Координата")
plt.ylabel("Фраза")
plt.tight_layout()
plt.show()