forked from hj-n/umato_exp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscalability.py
More file actions
221 lines (208 loc) · 7.46 KB
/
scalability.py
File metadata and controls
221 lines (208 loc) · 7.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import argparse
import os
import numpy as np
import pandas as pd
from scipy.stats import loguniform
from src.evaluation.models.dataset import save_csv
import time
import signal
import sys
sys.path.append("./src")
class Argument:
def __init__(self, dict):
self.module = dict["module"]
self.classname = dict["classname"]
self.paramfile = dict["paramfile"]
self.dataset = dict["dataset"]
self.repeat = dict["repeat"]
# A handler for timeout
def handler(signum, frame):
raise Exception("time over")
# import given algorithm class dynamically
# input: module name, class name, hyperparameters file name (optional)
# parser = argparse.ArgumentParser(description="DR algorithm benchmark")
# parser.add_argument('-m', "--module", type=str, help="a module name including target class", required=True)
# parser.add_argument('-c', "--classname", type=str, help="a class name that activate DR algorithm", required=True)
# parser.add_argument('-p', "--paramfile", type=str, help="a file containing hyperparameters", default=None)
# parser.add_argument('-d', "--dataset", nargs='+', help="a dataset name", default=None)
# parser.add_argument('-r', "--repeat", type=int, help="number of times to repeat", default=1)
# args = parser.parse_args()
repeat_num = 5
alg_list = [
#{
# "module": "umap.umap_",
# "classname": "UMAP",
# "paramfile": {"n_neighbors": 15},
# "dataset": None,
# "repeat": repeat_num,
#},
#{
# "module": "pacmap",
# "classname": "PaCMAP",
# "paramfile": {"n_neighbors": 15},
# "dataset": None,
# "repeat": repeat_num,
#},
#{
# "module": "densmap",
# "classname": "densMAP",
# "paramfile": {"n_neighbors": 15},
# "dataset": None,
# "repeat": repeat_num,
#},
#{
# "module": "trimap",
# "classname": "TRIMAP",
# "paramfile": {"n_inliers": 15},
# "dataset": None,
# "repeat": repeat_num,
#},
#{
# "module": "MulticoreTSNE",
# "classname": "MulticoreTSNE",
# "paramfile": {},
# "dataset": None,
# "repeat": repeat_num,
#},
#{
# "module": "sklearn.manifold",
# "classname": "Isomap",
# "paramfile": {"n_neighbors": 15},
# "dataset": None,
# "repeat": repeat_num,
#},
{
"module": "src.umato.umato_",
"classname": "UMATO",
"paramfile": {"n_neighbors": 15},
"dataset": None,
"repeat": repeat_num,
},
]
for args in alg_list:
args = Argument(args)
# alg_class: an executable model class
# Assume the class has fit_transform method
alg_module = __import__(args.module, globals(), locals(), [args.classname], 0)
alg_class = getattr(alg_module, args.classname)
# dict to log average execution time for each dataset
avg_time_dict = {}
# get hyperparameters from console input or param file
hp_dict = {}
# if paramfile is not given
if args.paramfile is None:
print(
"Input parameters in [name]=<value> form. If you want to end, press Enter twice."
)
s = input()
while s and s != "":
if "=" not in s:
print("Wrong input format. Please write in [name]=<value> form.")
s = input()
continue
s_list = s.split("=")
# try to convert value into int, float, or boolean if possible
try:
s_list[1] = int(s_list[1])
except:
try:
s_list[1] = float(s_list[1])
except:
try:
s_list[1] = bool(s_list[1])
except:
pass
hp_dict[s_list[0]] = s_list[1]
s = input()
# Used for our experiment
elif type(args.paramfile) == dict:
hp_dict = args.paramfile
# if paramfile is given
else:
# param_file = open(args.paramfile, "r")
# for line in param_file.readlines():
# if "=" not in line:
# continue
# line = line.strip()
# s_list = line.split("=")
# # try to convert value into int, float, or boolean if possible
# try:
# s_list[1] = int(s_list[1])
# except:
# try:
# s_list[1] = float(s_list[1])
# except:
# try:
# s_list[1] = bool(s_list[1])
# except:
# pass
# hp_dict[s_list[0]] = s_list[1]
pass
# get the list of names of datasets that w ill be used to test the algorithm
# if --dataset is not given, then use all datasets
dataset_list = []
if args.dataset is None:
rootdir = "umato_exp/datasets/npy"
for rootdir, dirs, files in os.walk(rootdir):
dataset_list = dirs
break
else:
dataset_list = sorted(args.dataset)
# load dataset with .npy file and run algorithm for specified number of times
for i, datadir in enumerate(dataset_list):
print(f"Dataset: {datadir} ({(i+1)}/{len(dataset_list)})")
print(f"Run [{args.classname}] as ")
x = np.load(f"umato_exp/datasets/npy/{datadir}/data.npy")
label = np.load(f"umato_exp/datasets/npy/{datadir}/label.npy")
elapsed_time = []
# For experiment
if args.module == "umato":
hp_dict["hub_num"] = len(x) / 30
print("[UMATO] hub_num: ", hp_dict["hub_num"])
for i in range(args.repeat + 1):
# timeout after an hour
signal.signal(signal.SIGALRM, handler)
signal.alarm(3600)
try:
start = time.time()
y = alg_class(**hp_dict).fit_transform(x)
end = time.time()
signal.alarm(0)
if i > 0:
print(
f"[{args.classname}, {datadir}] elapsed time (repeat {i}): {end-start}"
)
elapsed_time.append(end - start)
except:
print(
f"[{args.classname}, {datadir}] elapsed time (repeat {i}) over 1 hour"
)
elapsed_time.append(np.inf)
avg_time = sum(elapsed_time) / len(elapsed_time)
print(
f"[{args.classname}, {datadir}] average time of {args.repeat} trials: {avg_time}"
)
avg_time_dict[datadir] = avg_time
# save train results into csv
# path = os.path.join(os.getcwd(), "visualization", "public", "results", datadir)
# save_csv(path, alg_name=args.classname, data=y, label=label)
# load scalability.csv file as a dataframe and save
try:
df = pd.read_csv("scalability/scalability.csv")
# if csv does not exist, than create a new dataframe
except OSError:
df = pd.DataFrame(columns=(["name", "repeat_num"] + dataset_list))
# add new columns(datasets) to already existing dafaframe
for col in dataset_list:
if col not in df.columns:
df[col] = np.nan
# add new row
if args.classname in df["name"]:
df.drop(df[df["name"] == args.classname].index, inplace=True)
df = df.append(
{**avg_time_dict, "repeat_num": args.repeat, "name": args.classname},
ignore_index=True,
)
if "Unnamed: 0" in df.columns:
df.drop(["Unnamed: 0"], axis=1, inplace=True)
df.to_csv("scalability/scalability.csv")