-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimization.py
More file actions
85 lines (67 loc) · 2.96 KB
/
optimization.py
File metadata and controls
85 lines (67 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Author: Mikita Sazanovich
import tensorflow as tf
# https://github.com/google-research/bert/blob/master/optimization.py
class BERTSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, optimizer_hyperparameters):
super().__init__()
self.num_train_steps = optimizer_hyperparameters['iterations']
self.num_warmup_steps = optimizer_hyperparameters['warmup_iterations']
self.init_lr = float(optimizer_hyperparameters['init_learning_rate'])
def __call__(self, step):
learning_rate = tf.constant(value=self.init_lr, shape=[], dtype=tf.float32)
# Implements linear decay of the learning rate.
learning_rate = tf.compat.v1.train.polynomial_decay(
learning_rate,
step,
self.num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
# Implements linear warmup. I.e., if global_step < num_warmup_steps, the
# learning rate will be `global_step/num_warmup_steps * init_lr`.
if self.num_warmup_steps:
step_int = tf.cast(step, tf.int32)
warmup_steps_int = tf.constant(self.num_warmup_steps, dtype=tf.int32)
step_float = tf.cast(step_int, tf.float32)
warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
warmup_percent_done = step_float / warmup_steps_float
warmup_learning_rate = self.init_lr * warmup_percent_done
is_warmup = tf.cast(step_int < warmup_steps_int, tf.float32)
learning_rate = (
(1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
return learning_rate
# https://arxiv.org/pdf/1910.10683.pdf
class T5Schedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, optimizer_hyperparameters):
super().__init__()
self.num_warmup_steps = optimizer_hyperparameters['warmup_iterations']
def __call__(self, step):
warmup_steps_float = tf.constant(self.num_warmup_steps, dtype=tf.float32)
step_or_warmup = tf.maximum(step, warmup_steps_float)
lr = tf.math.rsqrt(step_or_warmup)
return lr
class ConstantSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, optimizer_hyperparameters):
self.lr = float(optimizer_hyperparameters['lr'])
def __call__(self, step):
learning_rate = tf.constant(value=self.lr, shape=[], dtype=tf.float32)
return learning_rate
def create_optimizer_from_params(params):
lr_schedule_class = globals()[params['lr_schedule_class']]
lr_schedule = lr_schedule_class(params)
method = params['method']
if method == 'adam':
adam_params = {
'learning_rate': lr_schedule,
'beta_1': float(params['beta_1']),
'beta_2': float(params['beta_2']),
}
optimizer = tf.keras.optimizers.Adam(**adam_params)
elif method == 'sgd':
sgd_params = {
'learning_rate': lr_schedule,
}
optimizer = tf.keras.optimizers.SGD(**sgd_params)
else:
raise ValueError(f'Unknown optimizer method: {method}. Supported options are adam and sgd.')
return optimizer