-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathad_leaderboard.py
More file actions
97 lines (86 loc) · 4.34 KB
/
ad_leaderboard.py
File metadata and controls
97 lines (86 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import datetime
import gc
import os
import sys
import time
import pandas as pd
import torch
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score
src_path = os.path.abspath(os.path.join("src"))
if src_path not in sys.path:
sys.path.insert(0, src_path)
from samay.utils import get_tsb_ad_datasets, load_args
from samay.model import MomentModel, LPTMModel
from samay.dataset import MomentDataset, LPTMDataset
start = time.time()
NAMES = get_tsb_ad_datasets("data/TSB-AD-U")
end = time.time()
print(f"Time taken to load dataset names: {end - start:.2f} seconds")
if __name__ == "__main__":
very_start = time.time()
for model_name in ["LPTMModel"]:
csv_path = f"leaderboard/AD_{model_name}.csv"
if not os.path.exists(csv_path):
df = pd.DataFrame(columns=["dataset", "size_in_MB", "eval_time", "AUC_ROC", "Precision", "Recall", "F1"])
df.to_csv(csv_path, index=False)
mod_start = time.time()
mod_timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
for fpath, (_, fsize) in NAMES.items():
dataset_name = fpath.split("/")[-1].split(".")[0]
print(f"Model eval started at: {mod_timestamp}")
print(f"Running {model_name} on {dataset_name}")
train_size = fpath.split(".")[0].split("_")[-3]
train_size = int(train_size)
if model_name == "MomentModel":
args = load_args("config/moment_detection.json")
model = MomentModel(**args)
# train_set = MomentDataset(path=fpath, boundaries=[train_size, train_size, 0], task_name="detection", mode="train")
test_set = MomentDataset(path=fpath, boundaries=[-1, -1, -1], task_name="detection", mode="test")
elif model_name == "LPTMModel":
args = load_args("config/lptm.json")
model = LPTMModel(**args)
# train_set = LPTMDataset(path=fpath, boundaries=[train_size, train_size, 0], task_name="detection", mode="train")
test_set = LPTMDataset(path=fpath, boundaries=[-1, -1, -1], task_name="detection", mode="test")
start = time.time()
# model.finetune(dataset=train_set, task_name="detection")
trues, preds, labels = model.evaluate(test_set, task_name="detection")
anomaly_score = ((preds - trues) ** 2).flatten()
# set anomalies to be outside mean + 3*std
threshold = anomaly_score.mean() + 3 * anomaly_score.std()
pred_labels = (anomaly_score > threshold).astype(int)
# AUC_ROC, Precision, Recall, F1 as metrics
auc_roc = roc_auc_score(labels, anomaly_score)
precision = precision_score(labels, pred_labels)
recall = recall_score(labels, pred_labels)
f1 = f1_score(labels, pred_labels)
print(f"AUC_ROC: {auc_roc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
end = time.time()
print(f"Time taken to evaluate {model_name} on {dataset_name}: {end - start:.2f} seconds")
eval_time = end - start
unit = 's'
if eval_time > 1000:
eval_time /= 60
unit = 'm'
df = pd.read_csv(csv_path)
row_name = f"{dataset_name}"
if row_name in df['dataset'].values:
print(f"Dataset {dataset_name} already exists in {csv_path}, skipping...")
continue
else:
new_row = {
"dataset": dataset_name,
"size_in_MB": fsize,
"eval_time": f"{eval_time:.2f} {unit}",
"AUC_ROC": f"{auc_roc:.4f}",
"Precision": f"{precision:.4f}",
"Recall": f"{recall:.4f}",
"F1": f"{f1:.4f}"
}
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
df.to_csv(csv_path, index=False)
print(f"Results saved to {csv_path}")
mod_end = time.time()
print(f"Total time taken to evaluate {model_name} on all datasets: {mod_end - mod_start:.2f} seconds")
very_end = time.time()
print("All evaluations completed.")
print(f"Total time taken to evaluate all models on all datasets: {very_end - very_start:.2f} seconds")