-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathtorch_test.py
More file actions
142 lines (117 loc) · 4.88 KB
/
torch_test.py
File metadata and controls
142 lines (117 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# coding: utf-8
import torch
import torch.nn.functional as F
import torch.nn as nn
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import torch.optim as optim
import logger as L
from torch.utils.data.dataset import Dataset
logger = L.create_logger('NN_Visualiser')
import numpy as np
def plot_bar_grad_flow(named_parameters):
'''Plots the gradients flowing through different layers in the net during training.
Can be used for checking for possible gradient vanishing / exploding problems.
Usage: Plug this function in Trainer class after loss.backwards() as
"plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow'''
ave_grads = []
max_grads= []
layers = []
for n, p in named_parameters:
if(p.requires_grad) and ("bias" not in n):
layers.append(n)
ave_grads.append(p.grad.abs().mean())
max_grads.append(p.grad.abs().max())
plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
plt.xlim(left=0, right=len(ave_grads))
plt.ylim(bottom = -0.001, top=0.02) # zoom in on the lower gradient regions
plt.xlabel("Layers")
plt.ylabel("average gradient")
plt.title("Gradient flow")
def plot_grad_flow(named_parameters):
ave_grads = []
layers = []
for n, p in named_parameters:
if(p.requires_grad) and ("bias" not in n):
layers.append(n)
ave_grads.append(p.grad.abs().mean())
plt.plot(ave_grads, alpha=0.3, color="b")
plt.hlines(0, 0, len(ave_grads)+1, linewidth=1, color="k" )
plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
plt.xlim(xmin=0, xmax=len(ave_grads))
plt.xlabel("Layers")
plt.ylabel("average gradient")
plt.title("Gradient flow")
plt.grid(True)
class ModularPolicyDataset(Dataset):
def __init__(self, joint_pos,target_pos,actions):
self.joint_pos = joint_pos
self.target_pos = target_pos
self.actions = actions
def __getitem__(self, index):
return (self.joint_pos[index], self.target_pos[index],self.actions[index])
def __len__(self):
return len(self.joint_pos) # of how many examples(images?) you have
class FullyConnectedPolicyEstimator(nn.Module):
def __init__(self,num_states,num_actions):
super(FullyConnectedPolicyEstimator, self).__init__()
self.fc1 = nn.Linear(num_states, 200)
self.fc3 = nn.Linear(200, num_actions)
# x is input to the network.
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc3(x)
return x
class ModularPolicyEstimator(nn.Module):
def __init__(self,action_dims=8):
super(ModularPolicyEstimator, self).__init__()
modular_policy_op_dims = 10
# Define Modular Policy dimes
self.joint_pos_policy = FullyConnectedPolicyEstimator(7,modular_policy_op_dims)
self.target_pos_policy = FullyConnectedPolicyEstimator(3,modular_policy_op_dims)
# Define connecting Fc Linear layers.
self.fc1 = nn.Linear(20, 200)
self.fc3 = nn.Linear(200,action_dims)
def forward(self,joint_pos,target_pos):
joint_pos_op = self.joint_pos_policy(joint_pos)
target_pos_op = self.target_pos_policy(target_pos)
# option 1
# stacked_tensor : shape (batch_size,10+10)
# Output is combined into a single tensor.
stacked_tensor = torch.cat((joint_pos_op,target_pos_op),1)
op = F.relu(self.fc1(stacked_tensor))
op = self.fc3(op)
return op
x1 = torch.rand(1000,7)
x2 = torch.rand(1000,3)
y = torch.rand(1000,8)
batch_size = 64
epochs = 100
learning_rate = 0.01
dataset = ModularPolicyDataset(x1,x2,y)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
neural_network = ModularPolicyEstimator()
optimizer = optim.SGD(neural_network.parameters(), lr=learning_rate, momentum=0.9)
loss_function = nn.SmoothL1Loss()
neural_network.train()
final_loss = []
for epoch in range(epochs):
running_loss = 0.0
steps = 0
for batch_idx, (jointpos, targetpos,output) in enumerate(data_loader):
jointpos, targetpos,output = torch.Tensor(jointpos), torch.Tensor(targetpos),torch.Tensor(output)
optimizer.zero_grad()
network_pred = neural_network(jointpos.float(),targetpos.float())
loss = loss_function(network_pred,output.float())
loss.backward()
plot_grad_flow(neural_network.named_parameters())
optimizer.step()
running_loss += loss.item()*jointpos.size(0)
steps+=1
logger.info('[%d] loss: %.6f' % (epoch + 1, running_loss / (steps+1)))
final_loss.append(float(running_loss))
plt.show()