-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain_classifiers.py
More file actions
123 lines (97 loc) · 4.64 KB
/
train_classifiers.py
File metadata and controls
123 lines (97 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#from utils.trainers_aeon import train
from utils.trainers import train
from utils.load_datasets import load_datasets
# TODO should I move some functions in other files?
from utils.helpers import *
import argparse
import os
import numpy as np
from copy import deepcopy
def main(args):
# arguments extracting and processing
base_path = args.dataset_dir
saved_models_dir = args.saved_models_path
# testing if classifier's list is in the allowed range
model_names = args.classifiers
all_clfs_allowed = np.all( [ m in ["HC2" ,"drCIF" ,"MRH" ,"ConvTran","hydra","inceptionTime"] for m in model_names ] )
if all_clfs_allowed == False : raise ValueError("invalid classifier name(s)")
results_file = args.result_file
selection_dir = args.selection_dir
channel_selection = args.channel_selection
# data structure where results will be stored
results = {}
for f in sorted(os.listdir(args.dataset_dir ) ) :
# load data
dataset_dir = os.path.join(base_path,f)
original_data = load_datasets(dataset_dir, f)
current_dataset_name = original_data['name']
print("current loaded dataset is....", original_data['name'])
if selection_dir:
# if selection was provided, load the npy array then extract selections and relative names
selection_file_names = [f for f in os.listdir(selection_dir) if (current_dataset_name in f and f.endswith(".npz")) ]
selected_features_files = [ np.load(os.path.join(selection_dir,f),allow_pickle=True)['results'].item()
for f in selection_file_names ]
# initialize selection dictionary as empty dict
selection_dict = {}
for s in selected_features_files:
selection_dict = get_computed_AI_selections(
saliency_map_dict=s,channel_sel=channel_selection,
selection_dict=selection_dict, info="")
else:
# train on all features
selection_dict = {'allFeatures': {'allFeatures':None } }
results[current_dataset_name] = {}
for model_name in model_names:
results[current_dataset_name][model_name] = {}
for selection_name, selected_f in selection_dict.items():
# extract features from original data if necessary
data = original_data if selection_name=="allFeatures" else \
extract_features(deepcopy(original_data) , selected_f,channel_selection)
print("current evaluated selection is", selection_name, "of dataset", current_dataset_name)
# prepare a dataset to save stats on the following training
best_accuracy = -1
story = {
'accuracy' : [],
'average_memory_GB' : [],
'peak_memory_GB' :[],
'training_time' : []
}
for i in range(3):
print("training",(i+1),"-th model ...")
# TODO use **kwargs to say which value is which param?
model, current_accuracy, mem_used, training_time = elapsed_time(
train, {'dataset':data,'model_name':model_name,'return_train_predictions':False}
)
story['accuracy'].append(current_accuracy)
story['average_memory_GB'].append(mem_used['average_memory_GB'])
story['peak_memory_GB'].append(mem_used['peak_memory_GB'])
story['training_time'].append(training_time)
print((i+1),")",model_name,"training over! Accuracy is: ",current_accuracy, "\tTraining time:", training_time)
if current_accuracy > best_accuracy:
current_accuracy = best_accuracy
# save current best model
file_name = "_".join((current_dataset_name, model_name, selection_name))
save_model(file_name, model, model_name, saved_models_dir)
# delete model and run garbage collector for memory tracking purposes
clean_memory(model)
# add current results to results data structure
results[current_dataset_name][model_name][selection_name] = {
'accuracy' : np.mean(story['accuracy']),
'average_memory_GB' : np.mean(story['average_memory_GB']),
'peak_memory_GB' : np.max(story['peak_memory_GB']),
'training_time' : np.mean(story['training_time']),
'story' : story,
}
np.save(results_file, results)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("dataset_dir", type=str, help="folder where datasets are stored")
parser.add_argument("saved_models_path", type=str, help="folder where to save models")
parser.add_argument("result_file", type=str, help=".npy file where to store results")
parser.add_argument("--classifiers", nargs='+',help="classifier names")
parser.add_argument("--selection_dir", type=str, default=None, help='feature selection(s) to be used. '
'If not provided all features are used.')
parser.add_argument('--channel_selection',type=str2bool, default=False, help="whether to perform "
"channel selection(True) or time point selection(False)")
args = parser.parse_args()
main(args)