-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgradientDescent_LinearRegression.py
More file actions
165 lines (122 loc) · 5.37 KB
/
gradientDescent_LinearRegression.py
File metadata and controls
165 lines (122 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 22 16:01:40 2017
@author: picku
"""
"""Linear regression via gradient descent"""
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')
def total_squared_error(data, m, b):
"""Total squared error between the data points and line
y = mx + b SE_line = (y_1-(m*x_1+b))^2 + (y_2-(m*x_2)+b)^2 + ...+
(y_n - (m*x_n + b))^2"""
total_SE = 0
for data_point in data:
SE = (data_point[1] - (m * data_point[0] + b)) ** 2
total_SE += SE
return total_SE / len(data)
def partial_difference_quotient(f, points, v, i, h):
"""compute the ith partial difference quotient of function f w.r.t to ith position of v"""
#add h to just the ith element of v
w = [v_j + (h if j == i else 0) for j, v_j in enumerate(v)]
return (f(points, w[0], w[1]) - f(points, v[0], v[1])) / h
def estimate_gradient(points, current_m, current_b):
"""estimate partial derivatives w.r.t. m and b using difference quotients"""
m = current_m
b = current_b
partial_wrt_m = partial_difference_quotient(total_squared_error, points, [m, b], i = 0, h = 0.0001)
partial_wrt_b = partial_difference_quotient(total_squared_error, points, [m, b], i = 1, h = 0.0001)
return partial_wrt_m, partial_wrt_b
def step(points, current_m, current_b, learningRate):
"""take step 'downhill' in gradient descent process"""
#estimate gradient w.r.t. m and w.r.t. b
partial_wrt_m, partial_wrt_b = estimate_gradient(points, current_m, current_b)
#update m and b
next_m = current_m - (learningRate * partial_wrt_m)
next_b = current_b - (learningRate * partial_wrt_b)
#update tolerance levels for m and b
t_val = [next_m - current_m, next_b - current_b]
return next_m, next_b, t_val
def gradient_descent(points, start_m, start_b, learningRate, numIterations, tolerance):
"""perform gradient descent for specified number of iterations or when specified tolerance
levels are reached for both m and b"""
#initialize search
next_m = start_m
next_b = start_b
#keeping track of m and b steps for each iteration
m_steps = []
b_steps = []
total_SE = []
for i in range(numIterations):
numIters = 0
[next_m, next_b, t_val] = step(points, next_m, next_b, learningRate)
#take step if m and b tolerance are greater than given tolerance value
if abs(t_val[0]) >= tolerance and abs(t_val[1]) >= tolerance:
m_steps.append(next_m)
b_steps.append(next_b)
total_SE.append(total_squared_error(points, next_m, next_b)) #track total squared error
numIters = i + 1
else:
break
return next_m, next_b, m_steps, b_steps, total_SE, numIters
def create_line(m, b, x):
"""create line with slope m and y-intercept b using x data points"""
return m * x + b
def run():
data = np.load('sampleData.npy')
x = [row[0] for row in data]
y = [row[1] for row in data]
m, b, m_steps, b_steps, SE, numIters = gradient_descent(data, start_m = 0, start_b = 0, learningRate = 0.001, numIterations = 1000, tolerance = 0.0001)
print('m = {:.2f}, b = {:.2f}'.format(m,b))
#plot sample data points and line of best fit
plt.figure(1)
line = [create_line(m, b, i) for i in range(min(x)-1, max(x)+1)]
plt.plot(line, label = 'y = {:.2f}x + {:.2f}'.format(m, b))
plt.scatter(x, y)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
plt.legend()
#m and b step progression
fig = plt.figure(2)
plt.subplot(311)
plt.plot(m_steps, b_steps)
plt.xlabel('m')
plt.ylabel('b')
fig.subplots_adjust(bottom = 0.2)
#Lines created using m and b steps throughout gradient descent procedure
plt.subplot(312)
plt.scatter(x, y)
for i, j in zip(m_steps, b_steps):
line = [create_line(i, j, k) for k in range(int(round(min(x))-1), int(round(max(x)+1)))]
plt.plot(line)
plt.xlabel('x')
plt.ylabel('y')
#Total squared error vs. number of iterations
plt.subplot(313)
plt.plot(SE)
plt.ylabel('Total Squared Error')
plt.xlabel('Number of Iterations')
#Minimum error found on m,b surface plot
fig = plt.figure(3)
ax = fig.gca(projection = '3d')
m_plot = np.arange(m - 10, m + 10, 0.5)
b_plot = np.arange(b - 10, b + 10, 0.5)
m_plot, b_plot = np.meshgrid(m_plot, b_plot)
SE = np.array(total_squared_error(data, m_plot, b_plot))
#Set axis limits
ax.set_zlim(np.min(SE), np.max(SE))
ax.set_xlim(np.min(m_plot), np.max(m_plot))
ax.set_ylim(np.min(b_plot), np.max(b_plot))
#Set axis labels
ax.set_xlabel('m (slope)')
ax.set_ylabel('b (y-intercept')
ax.set_zlabel('SE (Total Squared Error')
ax.plot_wireframe(m_plot, b_plot, SE, color = 'grey')
ax.scatter(m, b, total_squared_error(data, m, b), s = 200, c = 'r', marker = '*') #, label = 'm = {}, b = {}'.format(m,b))
ax.text(m + 0.25, b + 0.25, total_squared_error(data, m, b), '(m = {:.2f}, b = {:.2f})'.format(m,b), size = 12, bbox=dict(facecolor='yellow', alpha=0.5))
plt.show()
if __name__ == '__main__':
run()