-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathdatasets.py
More file actions
65 lines (54 loc) · 2.9 KB
/
datasets.py
File metadata and controls
65 lines (54 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from keras.datasets import mnist, cifar10
import numpy as np
from keras.utils import to_categorical
def inbalanced_mnist(inbalance_size):
(X_train, y_train), (X_test, y_test) = mnist.load_data()
filters = np.zeros(X_train.shape[0], dtype=np.bool)
for i in range(10):
current_filters = y_train == i
if i % 2 == 0:
current_filters = np.logical_and(current_filters, np.cumsum(current_filters)<=inbalance_size*6)
filters = np.logical_or(filters, current_filters)
X_train, y_train = X_train[filters], y_train[filters]
filters = np.zeros(X_test.shape[0], dtype=np.bool)
for i in range(10):
current_filters = y_test == i
if i % 2 == 0:
current_filters = np.logical_and(current_filters, np.cumsum(current_filters)<=inbalance_size)
filters = np.logical_or(filters, current_filters)
X_test, y_test = X_test[filters], y_test[filters]
X_train, X_test = np.expand_dims(X_train / 255.0, axis=-1), np.expand_dims(X_test / 255.0, axis=-1)
y_train, y_test = to_categorical(y_train), to_categorical(y_test)
# dummy for regularization term
dummy_train, dummy_test = np.zeros((X_train.shape[0], 1), dtype=np.float32), np.zeros((X_test.shape[0], 1), dtype=np.float32)
y_train = np.concatenate([y_train, dummy_train], axis=-1)
y_test = np.concatenate([y_test, dummy_test], axis=-1)
return (X_train, y_train), (X_test, y_test)
def inbalanced_cifar(inbalance_size):
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
filters = np.zeros(X_train.shape[0], dtype=np.bool)
for i in range(10):
current_filters = y_train[:,0] == i
if i % 2 == 0:
current_filters = np.logical_and(current_filters, np.cumsum(current_filters)<=inbalance_size*5)
filters = np.logical_or(filters, current_filters)
X_train, y_train = X_train[filters], y_train[filters]
filters = np.zeros(X_test.shape[0], dtype=np.bool)
for i in range(10):
current_filters = y_test[:,0] == i
if i % 2 == 0:
current_filters = np.logical_and(current_filters, np.cumsum(current_filters)<=inbalance_size)
filters = np.logical_or(filters, current_filters)
X_test, y_test = X_test[filters], y_test[filters]
# ensure batch size is divisible by 8
n_train = X_train.shape[0] // 8 * 8
n_test = X_test.shape[0] // 8 * 8
X_train, y_train = X_train[:n_train], y_train[:n_train]
X_test, y_test = X_test[:n_test], y_test[:n_test]
X_train, X_test = X_train / 255.0, X_test / 255.0
y_train, y_test = to_categorical(y_train), to_categorical(y_test)
# dummy for regularization term
dummy_train, dummy_test = np.zeros((X_train.shape[0], 1), dtype=np.float32), np.zeros((X_test.shape[0], 1), dtype=np.float32)
y_train = np.concatenate([y_train, dummy_train], axis=-1)
y_test = np.concatenate([y_test, dummy_test], axis=-1)
return (X_train, y_train), (X_test, y_test)