-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSave_Data_For_Training.py
More file actions
45 lines (27 loc) · 1.36 KB
/
Save_Data_For_Training.py
File metadata and controls
45 lines (27 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from sympy.printing.pytorch import torch
def assign_labels_to_sentences(headlines, sentences):
labeled_data = []
model = SentenceTransformer('distiluse-base-multilingual-cased-v2')
model = model.to(torch.device("cpu")) # Für Raspberry Pi 5
headlines_embedding = model.encode(headlines, convert_to_tensor=True)
sentences_embedding = model.encode(sentences, convert_to_tensor=True)
similarity_matrix = util.cos_sim(sentences_embedding, headlines_embedding)
for i, sentence in enumerate(sentences):
best_match_idx = torch.argmax(similarity_matrix[i]).item()
best_score = similarity_matrix[i][best_match_idx].item()
if best_score > 0.3:
print(f"Beste Überschrift: {headlines[best_match_idx]}, Text: {sentence}, Score: {best_score:.3f}")
labeled_data.append((sentence, headlines[best_match_idx]))
else:
labeled_data.append((sentence, None))
return labeled_data
def save_texts_with_labels_to_csv(answer_and_label_as_dataframe):
try:
df = pd.DataFrame(answer_and_label_as_dataframe, columns=["Text", "Label"])
df.to_csv('training/training_elias.csv', index=False)
return True
except Exception as e:
print(f"Error while saving CSV: {e}")
return False