Tensorflow_01_Making_Predictions_from_2D_Data/script.js at master · sgir/Tensorflow_01_Making_Predictions_from_2D_Data · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/**
 * Load, format and vizualize car dataset
 */


/**
 * 1. Load Data, Prepare Feature Set and Visualize
 */

document.addEventListener('DOMContentLoaded', fetch('https://storage.googleapis.com/tfjs-tutorials/carsData.json')
    .then(response => {
        if (response.status !== 200) {
            console.log(' Problem fetching the JSON Dataset. Status Code: ' + response.status);
            return;
        }
        return response.json().then(data => {
            // the car dataset is a JSON array
            console.log('Here is your dataset data', data)
            const filteredDataset = data.filter(car =>
                (car.Horsepower != null && car.Miles_per_Gallon != null)
            );

            const cleanedDataset = filteredDataset.map( filteredcar => ({
                // retain only HP and MPG values in the processed data
                    mpg : filteredcar.Miles_per_Gallon,
                    hp : filteredcar.Horsepower
            }));
            console.log('Here is your cleaned dataset ', cleanedDataset)

            // Load and plot the original input data that we are going to train on
            // cleanedDataset is an Array of objects: eg. [{mpg: 18, hp: 130},..]
            const values = cleanedDataset.map(d => ({
            x: d.hp,
            y: d.mpg,
            }));

            console.log(values)
            tfvis.render.scatterplot(
            {name: 'Horsepower v MPG'},
            {values},
            {
                xLabel: 'Horsepower',
                yLabel: 'MPG',
                height: 300
            });

            // A Model is a collection of layers, loss fn, optimziers - A Neural Network, really.
            const model = createModel()
            tfvis.show.modelSummary({name: 'Model Summary'}, model);

            // Convert the data to a form we can use for training.
            const tensorData = convertToTensor(cleanedDataset);
            const {inputs, labels} = tensorData;

            // Train the model
            trainModel(model, inputs, labels).then(()=>{
                console.log('Done Training');
                testModel(model, cleanedDataset, tensorData);
            });


        })
    })
);


/**
 * 2. Create a Model / Neural Network
 *
 */


 function createModel(){
    // these layered representations(which are intiated with random weights) are learned over serveral loops via models called neural networks that eventually gives us the target representation.

    //instantiate a sequential model
    const model = tf.sequential();

    // A dense layer is a type of layer that multiplies its inputs by a matrix (called weights) and then adds a number (called the bias) to the result.
    // The inputShape is [1] because we have 1 number as our input (the horsepower of a given car).
    // units sets the weight of the input features (representative input)
    // Features - https://stackoverflow.com/questions/30669854/what-is-the-definition-of-feature-in-neural-network#:~:text=Features%20in%20a%20neural%20network,not%20the%20hidden%20layer%20nodes.
    model.add(tf.layers.dense(
        {inputShape: [1], units:50, useBias:true}));

    model.add(tf.layers.dense(
            {inputShape: [1], units:50, activation:'sigmoid'}));

    model.add(tf.layers.dense(
                {inputShape: [1], units:50, activation:'sigmoid'}));
    // Add an output layer
    model.add(tf.layers.dense({units: 1, useBias: true}));

    return model;

 }


 /**
  * 3. Preprocess the Data
  * Convert cleaned data to Tensors
  * Preprocess - Shuffle, Normalization
  * MPG on the y-axis.
 */
function convertToTensor(data) {
    // Wrapping these calculations in a tidy will dispose any
    // intermediate tensors.

    return tf.tidy(() => {
      // Step 1. Shuffle the data
      // Here we randomize the order of the examples we will feed to the training algorithm.
      // Shuffling is important because typically during training the dataset is broken up into smaller subsets, called batches, that the model is trained on.
      // Shuffling helps each batch have a variety of data from across the data distribution. By doing so we help the model:
      // Not learn things that are purely dependent on the order the data was fed in
      // Not be sensitive to the structure in subgroups (e.g. if it only sees high horsepower cars for the first half of its training it may learn a relationship that does not apply across the rest of the dataset).

      tf.util.shuffle(data);

      // Step 2. Convert data to Tensor
      // cleanedDataset is an Array of objects: eg. [{mpg: 18, hp: 130},..]

      const inputs = data.map(d => d.hp) // [130, 140, ...]
      const labels = data.map(d => d.mpg); // [18, 20, ..]

      // tf.tensor2d (values, shape?, dtype?)
      // shape = (samples, features) => [392,1]
      const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
      const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

      //Step 3. Normalize the data to the range 0 - 1 using min-max scaling
      const inputMax = inputTensor.max();
      const inputMin = inputTensor.min();
      const labelMax = labelTensor.max();
      const labelMin = labelTensor.min();

      // formula for linear scaling - https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
      const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
      const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

      return {
        inputs: normalizedInputs,
        labels: normalizedLabels,
        // Return the min/max bounds so we can use them later.
        inputMax,
        inputMin,
        labelMax,
        labelMin,
      }
    });
}


/**
 * 4. Train the model
 */

function trainModel(model, inputs, labels) {
    // Prepare the model for training.
    model.compile({
      optimizer: tf.train.adam(),
      loss: tf.losses.meanSquaredError,
      metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 100;

    return  model.fit(inputs, labels, {
      batchSize,
      epochs,
      shuffle: true,
      callbacks: tfvis.show.fitCallbacks(
        { name: 'Training Performance' },
        ['loss', 'mse'],
        { height: 200, callbacks: ['onEpochEnd'] }
      )
    });
  }


  function testModel(model, inputData, normalizationData) {
    const {inputMax, inputMin, labelMin, labelMax} = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

      const xs = tf.linspace(0, 1, 100);
      const preds = model.predict(xs.reshape([100, 1]));

      const unNormXs = xs
        .mul(inputMax.sub(inputMin))
        .add(inputMin);

      const unNormPreds = preds
        .mul(labelMax.sub(labelMin))
        .add(labelMin);

      // Un-normalize the data
      return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
      return {x: val, y: preds[i]}
    });

    const originalPoints = inputData.map(d => ({
      x: d.hp, y: d.mpg,
    }));


    tfvis.render.scatterplot(
      {name: 'Model Predictions vs Original Data'},
      {values: [originalPoints, predictedPoints], series: ['original', 'predicted']},
      {
        xLabel: 'Horsepower',
        yLabel: 'MPG',
        height: 300
      }
    );
  }