TFSeq2SeqExample/train.py at master · azyner/TFSeq2SeqExample · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297

from seq2seq_model import Seq2SeqModel
import tensorflow as tf
import numpy as np
import data_utils

import math
import os
import random
import sys
import time


tf.app.flags.DEFINE_float("learning_rate", 0.5, "Learning rate.")
tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.7,
                          "Learning rate decays by this much.")
tf.app.flags.DEFINE_float("max_gradient_norm", 5.0,
                          "Clip gradients to this norm.")
tf.app.flags.DEFINE_integer("batch_size", 64,
                            "Batch size to use during training.")
tf.app.flags.DEFINE_integer("rnn_size", 32, "Size of each model layer.")
tf.app.flags.DEFINE_integer("num_layers", 3, "Number of layers in the model.")
tf.app.flags.DEFINE_string("data_dir", "data", "Data directory")
tf.app.flags.DEFINE_string("train_dir", "train", "Training directory.")
tf.app.flags.DEFINE_string("logs_dir", "logs", "Logs directory.")
tf.app.flags.DEFINE_string("plot_dir", "plot", "Output plots directory.")
tf.app.flags.DEFINE_integer("max_train_data_size", 0,
                            "Limit on the size of training data (0: no limit).")
tf.app.flags.DEFINE_integer("steps_per_checkpoint", 200,
                            "How many training steps to do per checkpoint.")
tf.app.flags.DEFINE_boolean("predict", False,
                            "Set to True to use the model to generate a sequence prediction.")
tf.app.flags.DEFINE_boolean("run_many", False,
                            "Run a list of many jobs")
tf.app.flags.DEFINE_boolean("self_test", False,
                            "Run a self-test if this is set to True.")
tf.app.flags.DEFINE_boolean("use_fp16", False,
                            "Train using fp16 instead of fp32.")
tf.app.flags.DEFINE_boolean("gen_random_input_data", False,
                            "Generate data from function using varying random parameters (True) or a constant, single function")
tf.app.flags.DEFINE_integer("train_observation_steps", 30, "How many steps of data to feed the model during training.")
tf.app.flags.DEFINE_integer("train_prediction_steps", 40, "How many steps of data the model generates during training.")
tf.app.flags.DEFINE_integer("test_observation_steps", 100, "How many steps of data the model generates during testing.")
tf.app.flags.DEFINE_integer("test_prediction_steps", 500, "How many steps of data the model generates during testing.")

FLAGS = tf.app.flags.FLAGS

def get_title_from_params():
    return ('S2S' +
            'tre' + str(FLAGS.train_observation_steps) + '-'
            'trd' + str(FLAGS.train_prediction_steps) + '-'
            'tse' + str(FLAGS.test_observation_steps) + '-'
            'tsd' + str(FLAGS.test_prediction_steps) + '-'
            'rnn' + str(FLAGS.rnn_size) + '-'
            'nl'  + str(FLAGS.num_layers) + '-'
            'bs' + str(FLAGS.batch_size) + '-'
            'lr' + str(FLAGS.learning_rate)+ '-'
            'ld' + str(FLAGS.learning_rate_decay_factor) +'-'
            'rand' + ('T' if FLAGS.gen_random_input_data else 'F'))

def gen_data(observation_steps, prediction_steps):
    random.seed = 42
    num_functions = 20 #number of different functions
    function_set = []
    if FLAGS.gen_random_input_data:
        #function tuple is in order: a+b*fct(c+d*x)
        for i in range(num_functions):
            function_set.append((
                random.choice(np.linspace(-0.5,0.5,10)), #amplitude offset
                random.choice(np.linspace(0.1,1.5,10)), #amplitude
                random.choice(np.linspace(0,10,10)), #frequency offset
                random.choice(np.linspace(8,32,24)),# frequency (/2pi)
            ))
    function_set.append((0, 1, 0, 16))

    import scipy.signal as spsig

    #fct = np.sin
    #fct = spsig.sawtooth
    fct = spsig.square

    def doublesin(t):
        return 0.9 * np.sin(t) + 0.1*np.sin(10*t)

    def doublesquare(t):
        return 0.5*spsig.square(t,duty=0.75) - 0.5*spsig.square(2*t,duty=0.25)

    return data_utils.generate_data(fct, np.linspace(0, 100, 10000), function_set,
                                    observation_steps, prediction_steps, seperate=False)

def create_model(session, feed_future_data, train_model, observation_steps, prediction_steps, batch_size,
                 rnn_size, num_layers, learning_rate, learning_rate_decay_factor, input_size, max_gradient_norm):
    model = Seq2SeqModel(feed_future_data, train_model, observation_steps, prediction_steps, batch_size,
                         rnn_size, num_layers, learning_rate, learning_rate_decay_factor, input_size, max_gradient_norm)
    if not os.path.exists(FLAGS.train_dir):
        os.makedirs(FLAGS.train_dir)
    if not os.path.exists(os.path.join(FLAGS.train_dir,get_title_from_params())):
        os.makedirs(os.path.join(FLAGS.train_dir,get_title_from_params()))
    ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.train_dir,get_title_from_params()))
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
    return model

def train():
    tf.reset_default_graph()
    print "Start training for: " + get_title_from_params()
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    #PREPARE DATA INTO:
    #[train test validate] x [input_sequences, output_sequences]

    with tf.Session() as sess:
        # Create model.
        print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.rnn_size))

        #allowed to be varied between training and decoding
        past_steps = FLAGS.train_observation_steps
        future_steps = FLAGS.train_prediction_steps

        #Set for training
        train_model = True
        feed_future_data = False
        input_size = 1 #TODO fix this with sizing the input

        model = create_model(sess, feed_future_data, train_model, past_steps, future_steps, FLAGS.batch_size,
                             FLAGS.rnn_size, FLAGS.num_layers,FLAGS.learning_rate,FLAGS.learning_rate_decay_factor, input_size, FLAGS.max_gradient_norm)

        train_writer=tf.train.SummaryWriter(os.path.join(FLAGS.logs_dir,'train'+get_title_from_params()),sess.graph)
        summary_op = tf.merge_all_summaries()

        # Read data into buckets and compute their sizes.
        print ("Reading development and training data (limit: %d)."
               % FLAGS.max_train_data_size)

        past_sequence_data, future_sequence_data = gen_data(past_steps, future_steps)

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        while True:
            # Get a batch and make a step.
            start_time = time.time()

            past_sequences, future_sequences, target_weights = model.get_batch(past_sequence_data['train'],future_sequence_data['train'])

            observations = past_sequences #Apply noise here if desired

            _, step_loss, _ = model.step(sess, observations, future_sequences,
                                         target_weights, train_model)
            #Periodically, run without training for the summary logs
            if current_step % 20 == 0:
                _, step_loss, _ = model.step(sess, observations, future_sequences,
                                         target_weights, False,summary_writer=train_writer)

            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:

                # Print statistics for the previous epoch.
                perplexity = (loss) if loss < 300 else float("inf")
                print ("global step %d learning rate %.4f step-time %.2f Batch average MSE loss "
                       "%.4f" % (model.global_step.eval(), model.learning_rate.eval(),
                                 step_time, perplexity))

                # Decrease learning rate if no improvement was seen over last 3 times.
                decrement_timestep = 3
                if len(previous_losses) > decrement_timestep-1 and loss > 0.95*(max(previous_losses[-decrement_timestep:])): #0.95 is float fudge factor
                  sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(os.path.join(FLAGS.train_dir,get_title_from_params()), "TFseq2seqSinusoid.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                if perplexity < 0.02 or model.learning_rate.eval() < 0.01:
                    break


def predict():

    tf.reset_default_graph()

    with tf.Session() as sess:
        # Create model and load parameters.
        #can be varied between training and decoding
        past_steps = FLAGS.test_observation_steps
        future_steps = FLAGS.test_prediction_steps

        #set for decoding
        train_model = False
        feed_forward = False
        input_size = 1

        model = create_model(sess, feed_forward, train_model, past_steps, future_steps, 1,
                             FLAGS.rnn_size, FLAGS.num_layers, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
                             input_size, FLAGS.max_gradient_norm)
        model.batch_size = 1  # One sequence for testing

        past_sequence_data, future_sequence_data = gen_data(past_steps, future_steps)
        past_sequences, future_sequences, target_weights = model.get_batch(past_sequence_data['test'], future_sequence_data['test'])

        observations = past_sequences  # Apply noise here if desired

        true_output = np.copy(future_sequences)

        # Force all decoder inputs after the 'go' symbol to zero. They should be ignored, but this is just to be sure
        for i in range(future_steps):
            future_sequences[i+1][0] = 0

        _, output_loss, output_prediction = model.step(sess,observations,future_sequences,target_weights, train_model)

        #re-format graph input
        input_plot = []
        for l in range(len(observations)):
            input_plot.append(observations[l][0])
        output_gen_plt = []
        for l in range(len(output_prediction)):
            output_gen_plt.append(np.average(output_prediction[l][0]))
        #Here we discard the GO symbol
        true_output_plot = []
        for l in range(len(true_output)-1):
            true_output_plot.append(true_output[l+1][0])

        # Get plot ranges
        y_range = np.linspace(data_utils.data_linspace_tuple[0],
                              data_utils.data_linspace_tuple[1],
                              data_utils.data_linspace_tuple[2])
        input_range = y_range[0:len(input_plot)]
        output_range = y_range[len(input_plot):len(input_plot)+len(output_prediction)]
        plt_title = "TFSeq2Seq" + "rnn_size " + str(FLAGS.rnn_size) + " n_layers " + str(FLAGS.num_layers)

        if True: #Plot HTML bokeh
            from bokeh.plotting import figure, output_file, show
            output_file("traces.html")
            p1 = figure(title=plt_title, x_axis_label='x', y_axis_label='y',
                        plot_width=800, plot_height=800)  # ~half a 1080p screen
            p1.line(input_range, input_plot, legend="Input.", line_width=2,color='black')
            p1.line(output_range, true_output_plot, legend="True Output.", line_width=2,color='blue')
            p1.line(output_range, output_gen_plt, legend="Generated Output.", line_width=2,color='red')
            show(p1)

        if True: #Use matplotlib to plot PNG
            if not os.path.exists(FLAGS.plot_dir):
                os.makedirs(FLAGS.plot_dir)
            legend_str = []
            import matplotlib.pyplot as plt
            plt.figure(figsize=(20,10))
            plt.plot(input_range, input_plot)
            legend_str.append(['Input'])
            plt.plot(output_range, true_output_plot)
            legend_str.append(['True Output'])
            plt.plot(output_range, output_gen_plt)
            legend_str.append(['Generated Output'])
            plt.legend(legend_str, loc='upper left')
            fig_path = os.path.join(FLAGS.plot_dir,get_title_from_params()+'.png')
            plt.savefig(fig_path,bbox_inches='tight')
            #plt.show()

def run_many():

    rnn_size_range = [2,4,8,16,32,64]
    num_layers_range = [1,2,3]
    random_range = [True, False]
    batch_size_range = [16, 32, 64]
    learning_rate_range = [0.5]

    for batch_size in batch_size_range:
        for learning_rate in learning_rate_range:
            for size in rnn_size_range:
                for layers in num_layers_range:
                    for random in random_range:
                        FLAGS.rnn_size = size
                        FLAGS.num_layers = layers
                        FLAGS.gen_random_input_data = random
                        FLAGS.batch_size = batch_size
                        FLAGS.learning_rate = learning_rate
                        train()
                        predict()

def main(_):
    if FLAGS.run_many:
        run_many()
    elif FLAGS.predict:
        predict()
    else:
        train()
        predict()

if __name__ == "__main__":
    tf.app.run()