-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassifier.py
More file actions
168 lines (131 loc) · 6.28 KB
/
classifier.py
File metadata and controls
168 lines (131 loc) · 6.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import time
#get enoof data
data=np.array(pd.read_csv('train.csv'))
m, n = data.shape
np.random.shuffle(data) #shuffle up data before using
#define them, n,m are defined later on
data_dev =data[0:5000].T #transposing the data so label is on X axis, taking only 1000 features, nvm took 5k
Y_dev=data_dev[0]
X_dev=data_dev[1:n]/255. #the 256 values
data_train = data[5000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]/255. #yes 256 valoos again
_,m_train = X_train.shape
'''ok so 2 layers,input layer containing 784 units for 784 pixels respectively consider this a0
one hidden layer with 10 units with relu activation, consider this a1
and one output layer a2, with 10 nodes corresponding to 0-9 digits, with softmax
Forward prop :
Z1 = W1 X + b1
A1 = relu (Z1)
Z2 = W2 A1 + b2
A2 = soft(Z2)
backward prop:
dZ2 = A2 - Y
dW2 = 1/m dZ2 A1(transposed)
dB2 = 1/m summation(dZ2)
dZ1 = W2(transposed) dZ2 *g1(transposed) z1 NOTE *g1 is basically derivative of activation
dW1 = 1/m dZ1 A0 (transposed) NOTE all the 'd' variables stand for error, so basically label values - model predicted valeues
dB 1/m summation(dZ1)
variable updation:
W2 = W2 - alpha dW2
b2 = b2 - alpha db2
W1 = W1 - alpha dW1 alpha being some learning rate
b1 = b1 - alpha db1
ILL WRITE VARIABLE DEFINATIONS LATER
'''
#TRAINING
def init_params():
W1 = np.random.rand(10,784) - 0.5 # to maintain values with 0 as median and initialise rando data
b1 = np.random.rand(10,1) - 0.5
W2 = np.random.rand(10,10) - 0.5
b2 = np.random.rand(10,1) - 0.5
return W1,b1,W2,b2
#definiing activation functons to downscale the values to 0-1 floating poitn range
def ReLU(Z): # relu is easy just return x if x>0 else 0, the graph kinda looks similar to third approx. model of diode
return np.maximum(Z,0)
def softmax(Z): #choosing softmax over tanh because i couldnt find eulers expansion for tanh, and i understand how softmax worked
A = np.exp(Z)/sum(np.exp(Z))
return A
def forward_prop(W1,b1,W2,b2,X):
Z1 = W1.dot(X)+b1 # dot product because they are arrays
A1=ReLU(Z1) #input to hidden later activation is relu because ez to use and more than enough
Z2 = W2.dot(A1)+b2
A2 = softmax(Z2) # hidden to output layer activation is softmax to
return Z1,A1,Z2,A2
def ReLU_deriv(Z): # welp diff of 0 is useless, and lookin at the relu graph the slope(dy/dx) should be 1 as its linear
return Z>0 #this works because internally True/False are references as 1/0 which is what we need
def one_hot(Y): # one hot enconding to make the label a one dimensional array of length 9, with corresponding label value =1 rest =0, figuring out one hot encoding was the toughest :'')
one_hot_Y = np.zeros((Y.size, Y.max() + 1))
one_hot_Y[np.arange(Y.size), Y] = 1
one_hot_Y = one_hot_Y.T
return one_hot_Y
def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
one_hot_Y = one_hot(Y)
dZ2 = A2 - one_hot_Y
dW2 = 1 / m * dZ2.dot(A1.T)
db2 = 1 / m * np.sum(dZ2)
dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
dW1 = 1 / m * dZ1.dot(X.T)
db1 = 1 / m * np.sum(dZ1)
return dW1, db1, dW2, db2
def param_update(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha): #update old variables wh
W1 = W1 - alpha * dW1
b1 = b1 - alpha * db1
W2 = W2 - alpha * dW2
b2 = b2 - alpha * db2
return W1, b1, W2, b2
def get_pred(A2): # output results for each iteration
return np.argmax(A2,0)
def get_acc(predictions,Y):
print(predictions,Y) #prediction will be defined later
return np.sum(predictions==Y)/Y.size
#logic for get_pred and get_acc was copy pastad and changeda bit
def gradient_descent(X,Y,alpha,iterations): #basically going back and forth bw forward and bacward propagation
W1, b1, W2, b2 = init_params()
for i in range(iterations):
Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X) #does intial value settin by tampering with random values set in init_params
dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y) # bacward progataion using previous data
W1, b1, W2, b2 = param_update(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) #finally setting the biasand weights right after every iteration of the descent
if i % 10 == 0: #welp ill print every 10 iters because hmmm i dont want terminal spam
print("Iteration: ", i)
predictions = get_pred(A2)
print(get_acc(predictions, Y))
return W1, b1, W2, b2
W1,b1,W2,b2=gradient_descent(X_train,Y_train,0.10,1000) # training the model for 1000 iterations welp it gave me 48 percent accuracy before i fixed issue with relu, now it gives me 80+
#also using actual data from csv file
# prediction on requested digit
def make_predictions(X, W1, b1, W2, b2): #function to make prediction from an iteration of forward propagation
_, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
predictions = get_pred(A2)
return predictions
def test_prediction(index, W1, b1, W2, b2):
current_image = X_train[:, index, None]
prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
label = Y_train[index]
print("Prediction: ", prediction)
print("Label: ", label)
#converting pixel data into actual images
current_image = current_image.reshape((28, 28)) * 255
plt.gray()
plt.imshow(current_image, interpolation='nearest')
plt.show()
time.sleep(3)
# plot dosent close auto because of some issue u can comment out below while loop and use test_prediction(int(input('enter some index(0-37000)')),W1,b1,W2,b2) multiple times to do multiple test cases
ch=0
while ch==0:
test_prediction(int(input('enter some index(0-37000)')),W1,b1,W2,b2) #37000 because rest 5k i used on training
plt.close('all')
ch=1 if input('do you want to test more cases(y/n) ',W1,b1,W2,b2)=='n' else print('')
# test_prediction(8,W1,b1,W2,b2)
# test_prediction(5,W1,b1,W2,b2)
# test_prediction(1,W1,b1,W2,b2)
# test_prediction(3,W1,b1,W2,b2)
# test_prediction(2,W1,b1,W2,b2)
# test_prediction(9,W1,b1,W2,b2)
# test_prediction(0,W1,b1,W2,b2)
# test_prediction(7,W1,b1,W2,b2)
dev_predictions = make_predictions(X_dev, W1, b1, W2, b2) #getting accuracy value on dev set that was notused for training data
print(get_acc(dev_predictions, Y_dev))