-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepare_data.py
More file actions
86 lines (68 loc) · 3.39 KB
/
prepare_data.py
File metadata and controls
86 lines (68 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from pylab import *
import csv
n_sets = 8
n_folds = 4
x = [[] for _ in xrange(n_sets)]
y = [[] for _ in xrange(n_sets)]
x_train = [[] for _ in xrange(n_folds)]
x_val = [[] for _ in xrange(n_folds)]
x_test = [[] for _ in xrange(n_folds)]
y_train = [[] for _ in xrange(n_folds)]
y_val = [[] for _ in xrange(n_folds)]
y_test = [[] for _ in xrange(n_folds)]
# Naming convention of the files
file_str_data = "backup/csv_training_cells_"
file_type = ".csv"
file_str_truth = "backup/csv_truth_cells_"
# Reading the data files
for i in range(0, n_sets):
file_name_data = file_str_data + str(i+1) + file_type
file_name_truth = file_str_truth + str(i+1) + file_type
x[i] = np.array(list(csv.reader(open(file_name_data))))
y[i] = np.array(list(csv.reader(open(file_name_truth))))
# Onehotencoding formatting
y_OneHotEncoded = [[] for _ in xrange(n_sets)]
for i in range(0, n_sets):
y_OneHotEncoded[i] = zeros([len(y[i]), 2])
a = np.array(y[i] == '1').astype(int)
b = np.array(y[i] == '2').astype(int)
y_OneHotEncoded[i] = np.concatenate((a, b), axis=1)
# 1: Training: L2, L3, L5, L6, L7, L8
# Test: L1, L4
x_train[0] = np.concatenate((x[1], x[2], x[4], x[5], x[6], x[7]), axis=0)
x_test[0] = np.concatenate((x[0], x[3]), axis=0)
y_train[0] = np.concatenate((y_OneHotEncoded[1], y_OneHotEncoded[2], y_OneHotEncoded[4], y_OneHotEncoded[5], y_OneHotEncoded[6], y_OneHotEncoded[7]), axis=0)
y_test[0] = np.concatenate((y_OneHotEncoded[0], y_OneHotEncoded[3]), axis=0)
# 2: Training: L1, L3, L4, L5, L6, L8
# Test: L2, L7
x_train[1] = np.concatenate((x[0], x[2], x[3], x[4], x[5], x[7]), axis=0)
x_test[1] = np.concatenate((x[1], x[6]), axis=0)
y_train[1] = np.concatenate((y_OneHotEncoded[0], y_OneHotEncoded[2], y_OneHotEncoded[3], y_OneHotEncoded[4], y_OneHotEncoded[5], y_OneHotEncoded[7]), axis=0)
y_test[1] = np.concatenate((y_OneHotEncoded[1], y_OneHotEncoded[6]), axis=0)
# 3: Training: L1, L2, L3, L4, L5, L7
# Test: L6, L8
x_train[2] = np.concatenate((x[0], x[1], x[2], x[3], x[4], x[6]), axis=0)
x_test[2] = np.concatenate((x[5], x[7]), axis=0)
y_train[2] = np.concatenate((y_OneHotEncoded[0], y_OneHotEncoded[1], y_OneHotEncoded[2], y_OneHotEncoded[3], y_OneHotEncoded[4], y_OneHotEncoded[6]), axis=0)
y_test[2] = np.concatenate((y_OneHotEncoded[5], y_OneHotEncoded[7]), axis=0)
# 4: Training: L1, L2, L4, L6, L7, L8
# Test: L3, L5
x_train[3] = np.concatenate((x[0], x[1], x[3], x[5], x[6], x[7]), axis=0)
x_test[3] = np.concatenate((x[2], x[4]), axis=0)
y_train[3] = np.concatenate((y_OneHotEncoded[0], y_OneHotEncoded[1], y_OneHotEncoded[3], y_OneHotEncoded[5], y_OneHotEncoded[6], y_OneHotEncoded[7]), axis=0)
y_test[3] = np.concatenate((y_OneHotEncoded[2], y_OneHotEncoded[4]), axis=0)
path = "data/"
for i in range(0, n_folds):
# writing data to separate files
with open(path + "x_train[" + str(i) + "].csv", "w+") as my_csv:
csvWriter = csv.writer(my_csv, delimiter=',')
csvWriter.writerows(x_train[i])
with open(path + "y_train[" + str(i) + "].csv", "w+") as my_csv:
csvWriter = csv.writer(my_csv, delimiter=',')
csvWriter.writerows(y_train[i])
with open(path + "x_test[" + str(i) + "].csv", "w+") as my_csv:
csvWriter = csv.writer(my_csv, delimiter=',')
csvWriter.writerows(x_test[i])
with open(path + "y_test[" + str(i) + "].csv", "w+") as my_csv:
csvWriter = csv.writer(my_csv, delimiter=',')
csvWriter.writerows(y_test[i])