-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgd_methods.py
More file actions
109 lines (88 loc) · 4.26 KB
/
gd_methods.py
File metadata and controls
109 lines (88 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from scipy.misc import derivative as dx
import sympy as sym
import numpy as np
import math
'''
This scipt contains the various gradient-descent methods
that are ready for matplotlib visualization
'''
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-10
def fx(val, x, y, z):
return x * val**3 + y * val**2 + z * val
def fx_sym(val, x, y, z):
return x * val**3 + y * val**2 + z * val
def cost_func(x, y, theta0, theta1, theta2, theta0_true, theta1_true, theta2_true, show='theta0_1'):
"""The cost function, J(theta0, theta1) describing the goodness of fit."""
theta0 = np.atleast_3d(np.asarray(theta0))
theta1 = np.atleast_3d(np.asarray(theta1))
theta2 = np.atleast_3d(np.asarray(theta2))
if show == 'theta0_1':
return np.average((y - fx(x, theta0, theta1, theta2_true))**2, axis=2)
elif show == 'theta1_2':
return np.average((y - fx(x, theta0_true, theta1, theta2))**2, axis=2)
elif show == 'theta0_2':
return np.average((y - fx(x, theta0, theta1_true, theta2))**2, axis=2)
def calc_gradient(x_vals, theta0_val, theta1_val, theta2_val, y_vals):
x, y, z = sym.symbols('x y z')
gradsum_theta0 = 0
gradsum_theta1 = 0
gradsum_theta2 = 0
for i, x_val in enumerate(x_vals):
cost_func = (fx_sym(x_val, x, y, z) - y_vals[i])**2
der_theta0 = sym.diff(cost_func, x)
der_theta1 = sym.diff(cost_func, y)
der_theta2 = sym.diff(cost_func, z)
grad_theta0 = der_theta0.evalf(subs={x: theta0_val, y: theta1_val, z: theta2_val})
grad_theta1 = der_theta1.evalf(subs={x: theta0_val, y: theta1_val, z: theta2_val})
grad_theta2 = der_theta2.evalf(subs={x: theta0_val, y: theta1_val, z: theta2_val})
gradsum_theta0 += grad_theta0
gradsum_theta1 += grad_theta1
gradsum_theta2 += grad_theta2
grad_theta0 = gradsum_theta0 / len(x_vals)
grad_theta1 = gradsum_theta1 / len(x_vals)
grad_theta2 = gradsum_theta2 / len(x_vals)
return grad_theta0, grad_theta1, grad_theta2
def gradient_descent(N, lr, x_vals, y_vals, theta0_true, theta1_true, theta2_true, show='theta0_1'):
theta = [np.array((0, 0, 0))]
J = [cost_func(x_vals, y_vals, *theta[0],
theta0_true, theta1_true, theta2_true)[0]]
for j in range(N-1):
last_theta = theta[-1]
this_theta = np.empty((3,))
grad_theta0, grad_theta1, grad_theta2 = calc_gradient(x_vals, last_theta[0],
last_theta[1], last_theta[2], y_vals)
print([grad_theta0, grad_theta1, grad_theta2])
this_theta[0] = last_theta[0] - (lr * grad_theta0)
this_theta[1] = last_theta[1] - (lr * grad_theta1)
this_theta[2] = last_theta[2] - (lr * grad_theta2)
theta.append(this_theta)
J.append(cost_func(x_vals, y_vals, *this_theta,
theta0_true, theta1_true, theta2_true))
return theta, J
def adam(N, lr, x_vals, y_vals, theta0_true, theta1_true, theta2_true, show='theta0_1'):
theta = [np.array((0, 0, 0))]
J = [cost_func(x_vals, y_vals, *theta[0],
theta0_true, theta1_true, theta2_true)[0]]
v_t = 0
s_t = 0
for i in range(1, N):
last_theta = theta[-1]
this_theta = np.empty((3,))
grad_theta0, grad_theta1, grad_theta2 = calc_gradient(x_vals, last_theta[0],
last_theta[1], last_theta[2], y_vals)
grad_arr = np.array([grad_theta0, grad_theta1, grad_theta2])
v_t = beta1 * v_t + (1- beta1) * grad_arr
s_t = beta2 * s_t + (1- beta2) * np.power(grad_arr, 2)
print(v_t, s_t)
v_t_hat = np.array(v_t / (1 - beta1**i), dtype=np.float32)
s_t_hat = np.array(s_t / (1 - beta2**i), dtype=np.float32)
update_step = lr * v_t_hat / (np.sqrt(s_t_hat) + epsilon)
this_theta[0] = last_theta[0] - update_step[0]
this_theta[1] = last_theta[1] - update_step[1]
this_theta[2] = last_theta[2] - update_step[2]
theta.append(this_theta)
J.append(cost_func(x_vals, y_vals, *this_theta,
theta0_true, theta1_true, theta2_true))
return theta, J