example-cnn/train.py at master · curiousg23/example-cnn · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os, struct
import numpy as np
from array import array as std_array
from numpy import random as rand
import scipy.sparse as sparse
import scipy.signal as sig
from ConvLayer import ConvLayer
from FullyConnectedLayer import FullyConnectedLayer
from PoolingLayer import PoolingLayer

def load_mnist(digits=np.arange(10)):
    """Load the mnist dataset and labels into a 3D array (height, width, num images)
    and column vector (1, num images) and return them both"""

    images = open('train-images-idx3-ubyte', 'rb')
    magic_num, size, rows, cols = struct.unpack('>IIII', images.read(16))
    images_arr = std_array("B", images.read())
    images.close()
    print "read images"

    labels = open('train-labels-idx1-ubyte', 'rb')
    magic_num, size = struct.unpack('>II', labels.read(8))
    labels_arr = std_array("b", labels.read())
    labels.close()
    print "read labels"

    ind = [k for k in xrange(size) if labels_arr[k] in digits]
    N = len(ind)

    train_dataset_imgs = np.zeros((rows, cols, N), dtype=np.uint8)
    train_dataset_labels = np.zeros((1,N), dtype=np.int8)
    for i in range(len(ind)):
        train_dataset_imgs[:,:,i] = np.array(images_arr[ind[i]*rows*cols:(ind[i]+1)*rows*cols]).reshape((rows, cols))
        train_dataset_labels[0,i] = labels_arr[ind[i]]

    return train_dataset_imgs, train_dataset_labels

def J(output, label):
    """Our error function--in this case cross-entropy"""
    sum_err = 0
    for i in range(0, 10):
        if label == i:
            sum_err -= np.log(output[i])
    return sum_err

def testGradient():
    """Test the backprop implementation by checking the gradients on a small network"""

    # load the training data
    images, labels = load_mnist()
    images /= 255.0

    grad_images = images[:,:,0:10] #use 10 image subset for gradient checking
    grad_labels = labels[0,0:10] #respective labels for the images--going to have to encode these labels

    # create a small network, 1 conv layer + 1 pooling layer + 1 fully connected softmax

    # convolutional layer, taking in a 28x28 image, using 2 9x9 filters
    # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form
    layer0 = ConvLayer(grad_images[:,:,0].reshape((28,28,1)), (28, 28, 1), (9, 9, 2, 1))
    print "initalized convolutional layer"
    layer0.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
    print "finished forward pass of convolutional layer"

    # pooling layer, taking in 2 20x20 feature maps
    # output should be 2 10x10 feature maps (though may want to downsample 5x for gradient check)
    layer1 = PoolingLayer(layer0.output, (20, 20, 2))
    print "initialized pooling layer"
    layer1.downsample(layer0.output, (20, 20, 2))
    print "finished forward pass of pooling layer"

    # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2)
    # or taking in 2 4x4 feature maps (if downsampled by 5)
    # either way, flattened into a long input vector
    full_conn_input = layer1.output.flatten()
    layer2 = FullyConnectedLayer(full_conn_input.reshape((full_conn_input.size, 1)), full_conn_input.size, 10)
    print "initialized fully-conn layer"
    layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    print "finished forward pass of fully-conn layer"

    # perform backpropagation
    target = np.zeros((10,1))
    for i in range(0, 10):
        if grad_labels[i] == 1:
            target[i] = 1
    layer2.backprop(0, 0, target)
    print "finished layer 2 backprop"
    layer1.upsample(layer2, 0)
    print "finished layer 1 backprop"
    layer0.backprop(layer1)
    print "finished layer 0 backprop"

    # # after initialization, finish training
    # for i in range(1, grad_labels.size):
    #     # forward propagation
    #     layer0.forwardprop(grad_images[:,:,i].reshape((28,28,1)))
    #     layer1.downsample(layer0.output, (20,20,2))
    #     full_conn_input = layer1.output.flatten()
    #     layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    #
    #     # backpropagation
    #     target = np.zeros((10,1))
    #     for j in range(0,10):
    #         if grad_labels[i] == 1:
    #             target[i] = 1
    #     layer2.backprop(0, 0, target)
    #     layer1.upsample(layer2, 0)
    #     layer0.backprop(layer1)

    # check the gradient
    epsilon = 1.0e-4
    layer0_check = layer0
    layer1_check = layer1
    layer2_check = layer2

    layer0_w_vec = layer0.W.flatten()
    layer0_bias_vec = layer0.bias.flatten()
    layer0_gradw = layer0.gradient_w.flatten()
    layer0_gradb = layer0.gradient_b.flatten()

    layer2_w_vec = layer2.W.flatten()
    layer2_bias_vec = layer2.bias.flatten()
    layer2_gradw = layer2.gradient_w.flatten()
    layer2_gradb = layer2.gradient_b.flatten()

    w_vec = np.concatenate((layer0_w_vec, layer0_bias_vec, layer2_w_vec, layer2_bias_vec))
    backprop_vec = np.concatenate((layer0_gradw, layer0_gradb, layer2_gradw, layer2_gradb))
    print layer0_gradw
    gradient_check = np.zeros(w_vec.size)
    for i in range(0, w_vec.size):
        pos = w_vec
        pos[i] += epsilon
        neg = w_vec
        neg[i] -= epsilon
        # feed-forward to get J(w+e), J(w-e), subtract and calculate gradient
        # J(w+e)
        layer0_check.W = pos[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = pos[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = pos[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = pos[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        pos_out = J(layer2_check.output, grad_labels[0])
        # J(w-e)
        layer0_check.W = neg[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = neg[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = neg[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = neg[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        neg_out = J(layer2_check.output, grad_labels[0])
        # compute gradient for i
        gradient_check[i] = (pos_out - neg_out)/(2*epsilon)

    # print gradient_check
    print gradient_check[0:layer0_w_vec.size]

testGradient()