bstellato · bstellato · Jul 12, 2019 · Jul 12, 2019 · Jul 12, 2019 · Jul 15, 2019
diff --git a/mlopt/learners/pytorch/model.py b/mlopt/learners/pytorch/model.py
@@ -7,13 +7,14 @@ class Net(nn.Module):
     PyTorch internal neural network class.
     """
 
-    def __init__(self, n_input, n_classes, n_hidden):
+    def __init__(self, n_input, n_classes, n_layers, n_hidden):
         super(Net, self).__init__()
 
         self.in_layer = nn.Linear(n_input, n_hidden)
         self.batchnorm = nn.BatchNorm1d(n_hidden)
-        self.linear1 = nn.Linear(n_hidden, n_hidden)
-        self.linear2 = nn.Linear(n_hidden, n_hidden)
+        self.linear = nn.ModuleList([nn.Linear(n_hidden, n_hidden)])
+        self.linear.extend([nn.Linear(n_hidden, n_hidden)
+                            for _ in range(n_layers - 1)])
         self.out_layer = nn.Linear(n_hidden, n_classes)
 
         # OLD Structure with linear layers
@@ -26,8 +27,8 @@ def forward(self, x):
 
         x = F.relu(self.in_layer(x))
         x = self.batchnorm(x)
-        x = F.relu(self.linear1(x))
-        x = F.relu(self.linear2(x))
+        for layer in self.linear:
+            x = F.relu(layer(x))
         x = self.out_layer(x)
 
         # OLD

diff --git a/mlopt/learners/pytorch/pytorch.py b/mlopt/learners/pytorch/pytorch.py
@@ -17,7 +17,7 @@ class PyTorchNeuralNet(Learner):
     PyTorch Neural Network learner.
     """
 
-    def __init__(self, **options):
+    def __init__(self, onehot=True, **options):
         """
         Initialize PyTorch neural network class.
 
@@ -30,6 +30,12 @@ def __init__(self, **options):
         self.name = PYTORCH
         self.n_input = options.pop('n_input')
         self.n_classes = options.pop('n_classes')
+        self.onehot = onehot
+        if onehot:
+            self.metrics = {'accuracy': u.accuracy_onehot}
+        else:
+            self.metrics = {'mean_squared_error': u.mean_squared_error}
+
 
         # Default params grid
         default_params = NET_TRAINING_PARAMS
@@ -64,7 +70,10 @@ def __init__(self, **options):
                                      self.n_classes)
 
         # Define loss
-        self.loss = nn.CrossEntropyLoss()
+        if onehot:
+            self.loss = nn.CrossEntropyLoss()
+        else:
+            self.loss = nn.MultiLabelSoftMarginLoss()
 
     def train_epoch(self, dataloader):
 
@@ -94,6 +103,7 @@ def train_epoch(self, dataloader):
                 if i % 100 == 0:
                     metrics.append(u.eval_metrics(outputs,
                                                   labels,
+                                                  self.metrics,
                                                   loss))
 
         return u.log_metrics(metrics, string="Train")
@@ -118,7 +128,7 @@ def evaluate(self, dataloader):
                 outputs = self.net(inputs)
                 loss = self.loss(outputs, labels)
 
-                metrics.append(u.eval_metrics(outputs, labels, loss))
+                metrics.append(u.eval_metrics(outputs, labels, self.metrics, loss))
 
         return u.log_metrics(metrics, string="Eval")
 
@@ -133,6 +143,7 @@ def train_instance(self,
         # Create PyTorch Neural Network and port to to device
         self.net = Net(self.n_input,
                        self.n_classes,
+                       params['n_layers'],
                        params['n_hidden']).to(self.device)
 
         info_str = "Learning Neural Network with parameters: "
@@ -155,22 +166,25 @@ def train_instance(self,
         # store/load
         # best_valid_accuracy = 0.0
 
-        for epoch in range(params['n_epochs']):  # loop over dataset multiple times
+        for epoch in range(params['n_epochs']):  # loop over dataset
 
             logging.info("Epoch {}/{}".format(epoch + 1, params['n_epochs']))
 
             train_metrics = self.train_epoch(train_dl)
             valid_metrics = self.evaluate(valid_dl)
 
-            valid_accuracy = valid_metrics['accuracy']
-
+            # Change evaluate calling a function set using the onehot flag
+            if self.onehot:
+                valid_evaluate = valid_metrics['accuracy']
+            else:
+                valid_evaluate = -valid_metrics['loss']
 
             # is_best = valid_accuracy >= best_valid_accuracy
             # if is_best:
             #     logging.info("- Found new best accuracy")
             #     best_valid_accuracy = valid_accuracy
 
-        return valid_accuracy
+        return valid_evaluate
 
     def train(self, X, y):
         """
@@ -185,14 +199,15 @@ def train(self, X, y):
         """
 
         self.n_train = len(X)
+        ytype = torch.long if self.onehot else torch.float
 
         # Convert X dataframe to numpy array
         # TODO: Move outside
         # X = pandas2array(X)
 
         # # Normalize data
 
-        # Shuffle data, split in train and validation and create dataloader here
+        # Shuffle data, split in train and validation and create dataloader
         np.random.seed(0)
         idx_pick = np.arange(self.n_train)
         np.random.shuffle(idx_pick)
@@ -206,7 +221,7 @@ def train(self, X, y):
         # Create validation data loader
         # Training data loader will be created when evaluating the model
         # which depends on the batch_size variable
-        valid_dl = u.get_dataloader(X_valid, y_valid)
+        valid_dl = u.get_dataloader(X_valid, y_valid, ytype=ytype)
 
         logging.info("Split dataset in %d training and %d validation" %
                      (len(train_idx), len(valid_idx)))
@@ -216,10 +231,12 @@ def train(self, X, y):
             'learning_rate': learning_rate,
             'batch_size': batch_size,
             'n_epochs': n_epochs,
+            'n_layers': n_layers,
             'n_hidden': n_hidden}
             for learning_rate in self.options['params']['learning_rate']
             for batch_size in self.options['params']['batch_size']
             for n_epochs in self.options['params']['n_epochs']
+            for n_layers in self.options['params']['n_layers']
             for n_hidden in self.options['params']['n_hidden']
         ]
         n_models = len(params)
@@ -229,20 +246,21 @@ def train(self, X, y):
                      "%d inputs, %d outputs" % (self.n_input, self.n_classes))
 
         # Create vector of results
-        accuracy_vec = np.zeros(n_models)
+        metrics_vec = np.zeros(n_models)
 
         if n_models > 1:
             for i in range(n_models):
 
-                # Create dataloader 
+                # Create dataloader
                 train_dl = u.get_dataloader(X_train, y_train,
-                                            batch_size=params[i]['batch_size'])
+                                            batch_size=params[i]['batch_size'],
+                                            ytype=ytype)
 
-                accuracy_vec[i] = self.train_instance(train_dl, valid_dl,
-                                                      params[i])
+                metrics_vec[i] = self.train_instance(train_dl, valid_dl,
+                                                     params[i])
 
             # Pick best parameters
-            self.best_params = params[np.argmax(accuracy_vec)]
+            self.best_params = params[np.argmax(metrics_vec)]
             logging.info("Best parameters")
             logging.info(str(self.best_params))
             logging.info("Train neural network with best parameters")
@@ -254,7 +272,8 @@ def train(self, X, y):
             self.best_params = params[0]
             train_dl = \
                 u.get_dataloader(X_train, y_train,
-                                 batch_size=self.best_params['batch_size'])
+                                 batch_size=self.best_params['batch_size'],
+                                 ytype=ytype)
 
         logging.info(self.best_params)
         # Retrain network with best parameters over whole dataset
@@ -271,11 +290,13 @@ def predict(self, X, n_best=None):
             # Convert pandas df to array (unroll tuples)
             X = torch.tensor(X, dtype=torch.float).to(self.device)
 
+        if self.onehot:
             # Evaluate classes
-            # NB. Removed softmax (unscaled probabilities)
+            # NB. Removed softmax for faster prediction (unscaled probabilities)
             y = self.net(X).detach().cpu().numpy()
-
-        return self.pick_best_class(y, n_best=n_best)
+            return self.pick_best_class(y, n_best=n_best)
+        else:
+            return torch.sigmoid(self.net(X)).detach().cpu().numpy()
 
     def save(self, file_name):
         # Save state dictionary to file

diff --git a/mlopt/learners/pytorch/utils.py b/mlopt/learners/pytorch/utils.py
@@ -4,20 +4,48 @@
 import logging
 
 
-def accuracy(outputs, labels):
+# How often should we compute the metrics
+METRICS_STEPS = 100
+
+
+def accuracy_onehot(outputs, labels):
     """
     Compute the accuracy, given the outputs and labels for all images.
 
     Args:
         outputs: (np.ndarray) output of the model
-        labels: (np.ndarray) batch labels 
+        labels: (np.ndarray) batch labels
 
     Returns: (float) accuracy in [0,1]
     """
     outputs = np.argmax(outputs, axis=1)
     return np.sum(outputs == labels) / float(labels.size)
 
 
+def mean_squared_error(outputs, labels):
+    """
+    Compute the mean squared error after rounding, 
+    given the outputs and labels.
+
+    Args:
+        outputs: (np.ndarray) output of the model
+        labels: (np.ndarray) batch labels
+
+    Returns: (float) accuracy in [0,1]
+    """
+    n_samples = len(labels)
+
+    # NB. There should be no need to square since the values can be
+    # either 1 or 0
+    normalized_outputs = np.reciprocal(1 + np.exp(-outputs))  # Normalize using sigmoid
+    differences = np.round(normalized_outputs) - labels
+    squared_diff = differences.dot(differences.T)
+    errors = np.diag(squared_diff) if n_samples > 1 else squared_diff[0][0]
+    mse = np.sum(errors) / n_samples
+
+    return mse
+
+
 def log_metrics(metrics, string="Train"):
     # compute mean of all metrics in summary
     metrics_mean = {metric: np.mean([x[metric] for x in metrics])
@@ -29,22 +57,22 @@ def log_metrics(metrics, string="Train"):
     return metrics_mean
 
 
-def eval_metrics(outputs, labels, loss):
+def eval_metrics(outputs, labels, metrics, loss):
     outputs = outputs.detach().cpu().numpy()
     labels = labels.detach().cpu().numpy()
 
     # compute all metrics on this batch
-    summary = {metric: METRICS[metric](outputs,
+    summary = {metric: metrics[metric](outputs,
                                        labels)
-               for metric in METRICS}
+               for metric in metrics}
     summary['loss'] = loss.item()
 
     return summary
 
 
-def get_dataloader(X, y, batch_size=1):
+def get_dataloader(X, y, batch_size=1, ytype=torch.long):
     X = torch.tensor(X, dtype=torch.float)
-    y = torch.tensor(y, dtype=torch.long)
+    y = torch.tensor(y, dtype=ytype)
 
     return DataLoader(TensorDataset(X, y),
                       batch_size=batch_size,
@@ -76,13 +104,4 @@ def __call__(self):
         return self.total/float(self.steps)
 
 
-# maintain all metrics required in this dictionary- these are used in
-# the training and evaluation loops
-METRICS = {
-    'accuracy': accuracy,
-    # could add more metrics such as accuracy for each token type
-}
-
-
-METRICS_STEPS = 100
 
diff --git a/mlopt/settings.py b/mlopt/settings.py
@@ -29,7 +29,7 @@
     'learning_rate': [1e-04, 1e-03, 1e-02],
     'n_epochs': [20],
     'batch_size': [32],
-    # 'n_layers': [5, 7, 10]
+    'n_layers': [2]
 }
 
 # Sampling