-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcross_validation.py
More file actions
165 lines (129 loc) · 6.03 KB
/
cross_validation.py
File metadata and controls
165 lines (129 loc) · 6.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import torch
from metrics import *
import time
import torch.nn as nn
import torch.optim as optim
from losses import *
import numpy as np
# from torch_lr_finder import LRFinder
def train(net, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs, file_name, device, tolerance = 1e-5, use_scheduler = True):
""" Trains the network num_epochs times."""
print('Starting ', file_name)
# If the the network has not been trained yet, we save it now
if net.metrics.epochs == 0:
torch.save({'metrics':net.metrics, 'state':net.state_dict()}, file_name)
net.metrics.Print_Infos()
# Make sure the network is in the right device
net.to(device)
LRS = []
for epochs in range(num_epochs):
LR = optimizer.param_groups[0]['lr'] # Just to know what is the current learning rate
print('Epoch {} , LR {}: |'.format(net.metrics.epochs + 1, LR), end='')
LRS.append(LR)
# Initialize the epoch's metrics
train_loss = 0
train_accuracy = 0
train_F1_score = 0
train_confusion_matrix = torch.tensor([[0,0],[0,0]])
validation_loss = 0
validation_accuracy = 0
validation_F1_score = 0
validation_confusion_matrix = torch.tensor([[0,0],[0,0]])
# for time monitoring
start = time.time()
net.train() # model in training mode
for i,(imgs, gt) in enumerate(train_loader):
# send to GPU
imgs = imgs.to(device)
gt = gt.to(device)
# forward pass
output = net(imgs)
loss = criterion(output, gt)
# backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
# update metrics
train_loss += loss
train_accuracy += compute_accuracy(output, gt)
train_F1_score += compute_F1_score(output, gt)
train_confusion_matrix += compute_confusion_matrix(output,gt)
if i%5==0:
print('-', end='')
net.eval()
with torch.no_grad():
for i,(imgs, gt) in enumerate(validation_loader):
# send to GPU
imgs = imgs.to(device)
gt = gt.to(device)
# compute prediction
output = net(imgs)
# update the minibatches metrics
validation_loss += criterion(output, gt)
validation_accuracy += compute_accuracy(output, gt)
validation_F1_score += compute_F1_score(output, gt)
validation_confusion_matrix += compute_confusion_matrix(output, gt)
if i%5 == 0:
print('-', end='')
print('|', end='')
# end time and display time for 1 epoch
end = time.time()
print('------Epoch over in {:.3f} seconds.'.format(end-start))
# We append the epoch's metrics to the network lists (we divide by the length of the dataloaders)
net.metrics.train_losses.append(train_loss / len(train_loader))
net.metrics.train_accuracies.append(train_accuracy/len(train_loader))
net.metrics.train_confusion_matrices.append(train_confusion_matrix/len(train_loader))
net.metrics.train_F1_scores.append(train_F1_score/len(train_loader))
net.metrics.validation_losses.append(validation_loss / len(validation_loader))
net.metrics.validation_accuracies.append(validation_accuracy/len(validation_loader))
net.metrics.validation_confusion_matrices.append(validation_confusion_matrix/len(validation_loader))
net.metrics.validation_F1_scores.append(validation_F1_score/len(validation_loader))
net.metrics.epochs = net.metrics.epochs + 1
net.metrics.epoch_times.append(end-start)
# We print the current results
net.metrics.Print_Infos()
# Update the learning rate with scheduler
if use_scheduler:
# Make sure we use the right step for the scheduler
if type(scheduler) == type(optim.lr_scheduler.ReduceLROnPlateau(optimizer)):
scheduler.step(validation_loss / len(validation_loader))
else:
scheduler.step()
# Save the Network so that we do not lose everything if colab closes
torch.save({'metrics':net.metrics, 'state':net.state_dict()}, file_name)
# If the net did not evolve, we break the loop
if net.metrics.epochs > 10:
if torch.abs(net.metrics.train_losses[-1] - net.metrics.train_losses[-2]) < tolerance:
print('Training terminated: it converged.\n')
break
# We plot the evolution of the Network
net.metrics.Plot_Infos()
def test(net, test_loader, criterion, device):
"""Computes the metrics on the test set"""
test_loss = 0
test_accuracy = 0
test_F1_score = 0
test_confusion_matrix = torch.tensor([[0,0],[0,0]])
net = net.eval() # To evaluate, no more dropouts
with torch.no_grad():
for i,(imgs, gt) in enumerate(test_loader):
imgs = imgs.to(device)
gt = gt.to(device)
# compute prediction
output = net(imgs)
# update the minibatches metrics
test_loss += criterion(output, gt)
test_accuracy += compute_accuracy(output, gt)
test_F1_score += compute_F1_score(output, gt)
test_confusion_matrix += compute_confusion_matrix(output, gt)
net.metrics.test_losses.append(test_loss / len(test_loader))
net.metrics.test_accuracies.append(test_accuracy/len(test_loader))
net.metrics.test_confusion_matrices.append(test_confusion_matrix/len(test_loader))
net.metrics.test_F1_scores.append(test_F1_score/len(test_loader))
print('F1 score: {}'.format(net.metrics.test_F1_scores[-1]))
# We comment this section, it asks for a library which not necessary for the submissions
#def find_best_lr(net, criterion, optimizer, train_loader, start_lr, end_lr):
# lr_finder = LRFinder(net, optimizer, criterion, device="cuda")
# lr_finder.range_test(train_loader, end_lr=end_lr, num_iter=400)
# lr_finder.plot() # to inspect the loss-learning rate graph
# lr_finder.reset() # to reset the model and optimizer to their initial state