rost5000 · aalekseyantonov · Apr 29, 2021
diff --git a/src/python/application/__init__.py b/src/python/application/__init__.py
@@ -8,6 +8,7 @@
 import src.python.facenet.utils as face_utils
 from src.python.insight_face.mtcnn import MTCNN
 from src.python.application.insight_face import Insight_Face
+from PIL import Image
 
 cap = cv2.VideoCapture(0)
 emb_train = []
@@ -16,7 +17,8 @@
 is_use_windows_message_box = False
 
 if __name__ == "__main__":
-    while True:
+    count = 0
+    while count < 10:
         frame = li.load_from_camera(cap)
         cv2.imshow('frame', frame)
         faces_coord = face_utils.get_face_from_image(frame)
@@ -29,7 +31,7 @@
                 frame[y_face:y_face + h_face, x_face:x_face + w_face]
             ))
             y_train.append(1)
-            break
+            count += 1
 
     if is_use_insight_face:
         insight_face = Insight_Face()
@@ -85,13 +87,22 @@
                     frame[y_face:y_face + h_face, x_face:x_face + w_face]
                 )
                 res = fs.predict_embedded_euclidean_distance(np.array([emb_test]), np.array(emb_train), y_train)
+                cv2.putText(
+                    frame,
+                    "FaceNet",
+                    (x_face + 10, y_face - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 1, (34, 139, 34), 2
+                )
                 cv2.rectangle(
                     frame,
                     (face_coord[0], face_coord[1]),
                     (face_coord[0] + face_coord[2], face_coord[1] + face_coord[3]),
-                    (0, 0, 255) if res[0] is None else (255, 0, 0),
+                    (34, 139, 34) if res[0] else (0, 69, 255),
                     6
                 )
+            # show_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # show_img = Image.fromarray(show_img)
+            # show_img.show()
             cv2.imshow('frame', frame)
 
     cap.release()

diff --git a/src/python/application/insight_face.py b/src/python/application/insight_face.py
@@ -65,7 +65,7 @@ def train_mvp(self):
         embs = []
         embeddings = []
         cap = cv2.VideoCapture(0)
-        while len(embs) == 0:
+        while len(embs) < 10:
             frame = li.load_from_camera(cap)
             emb = get_emb_from_frame(self.conf, self.learner.model, self.mtcnn, frame, tta=False)
             if emb is not None:
@@ -94,7 +94,7 @@ def infer(self, embeddings, names, boxes, faces, frame):
 
             cv2.putText(
                 frame,
-                predictions[ind],
+                "ArcFace+ResNet50",
                 (x_l_face_mtcnn + 10, y_up_face_mtcnn - 10),
                 cv2.FONT_HERSHEY_SIMPLEX, 1, (34, 139, 34), 2
             )

diff --git a/src/python/insight_face/Learner.py b/src/python/insight_face/Learner.py
@@ -2,7 +2,10 @@
 import torch
 from torchvision import transforms as trans
 from pathlib import Path
-
+from tqdm import tqdm
+import numpy as np
+from .verifacation import evaluate
+from .utils import get_time
 
 class face_learner(object):
     def __init__(self, conf):
@@ -22,6 +25,92 @@ def load_state(self, conf, fixed_str, from_save_folder=False, model_only=False):
             self.head.load_state_dict(torch.load(save_path / 'head_{}'.format(fixed_str)))
             self.optimizer.load_state_dict(torch.load(save_path / 'optimizer_{}'.format(fixed_str)))
 
+    def evaluate(self, conf, carray, issame, nrof_folds=5, tta=False):
+        self.model.eval()
+        idx = 0
+        embeddings = np.zeros([len(carray), conf.embedding_size])
+        with torch.no_grad():
+            while idx + conf.batch_size <= len(carray):
+                batch = torch.tensor(carray[idx:idx + conf.batch_size])
+                embeddings[idx:idx + conf.batch_size] = self.model(batch.to(conf.device)).cpu()
+                idx += conf.batch_size
+            if idx < len(carray):
+                batch = torch.tensor(carray[idx:])
+                embeddings[idx:] = self.model(batch.to(conf.device)).cpu()
+        tpr, fpr, accuracy, best_thresholds, tp, fp, tn, fn = evaluate(embeddings, issame, nrof_folds)
+        # buf = gen_plot(fpr, tpr)
+        # roc_curve = Image.open(buf)
+        # roc_curve_tensor = trans.ToTensor()(roc_curve)
+        roc_curve_tensor = ""
+        return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor, tp, fp, tn, fn
+
+    def board_val(self, db_name, accuracy, best_threshold, roc_curve_tensor):
+        self.writer.add_scalar('{}_accuracy'.format(db_name), accuracy, self.step)
+        self.writer.add_scalar('{}_best_threshold'.format(db_name), best_threshold, self.step)
+        self.writer.add_image('{}_roc_curve'.format(db_name), roc_curve_tensor, self.step)
+
+    def train(self, conf, epochs):
+        self.model.train()
+        running_loss = 0.
+        for e in range(epochs):
+            print('epoch {} started'.format(e))
+            if e == self.milestones[0]:
+                self.schedule_lr()
+            if e == self.milestones[1]:
+                self.schedule_lr()
+            if e == self.milestones[2]:
+                self.schedule_lr()
+            for imgs, labels in tqdm(iter(self.loader)):
+                imgs = imgs.to(conf.device)
+                labels = labels.to(conf.device)
+                self.optimizer.zero_grad()
+                embeddings = self.model(imgs)
+                thetas = self.head(embeddings, labels)
+                loss = conf.ce_loss(thetas, labels)
+                loss.backward()
+                running_loss += loss.item()
+                self.optimizer.step()
+
+                if self.step % self.board_loss_every == 0 and self.step != 0:
+                    loss_board = running_loss / self.board_loss_every
+                    self.writer.add_scalar('train_loss', loss_board, self.step)
+                    running_loss = 0.
+
+                if self.step % self.evaluate_every == 0 and self.step != 0:
+                    accuracy, best_threshold, roc_curve_tensor = self.evaluate(conf, self.agedb_30,
+                                                                               self.agedb_30_issame)
+                    self.board_val('agedb_30', accuracy, best_threshold, roc_curve_tensor)
+                    self.model.train()
+                if self.step % self.save_every == 0 and self.step != 0:
+                    self.save_state(conf, accuracy)
+
+                self.step += 1
+
+        self.save_state(conf, accuracy, to_save_folder=True, extra='final')
+
+    def save_state(self, conf, accuracy, to_save_folder=False, extra=None, model_only=False):
+        if to_save_folder:
+            save_path = conf.save_path
+        else:
+            save_path = conf.model_path
+        torch.save(
+            self.model.state_dict(), save_path /
+                                     ('model_{}_accuracy:{}_step:{}_{}.pth'.format(get_time(), accuracy, self.step,
+                                                                                   extra)))
+        if not model_only:
+            torch.save(
+                self.head.state_dict(), save_path /
+                                        ('head_{}_accuracy:{}_step:{}_{}.pth'.format(get_time(), accuracy, self.step,
+                                                                                     extra)))
+            torch.save(
+                self.optimizer.state_dict(), save_path /
+                                             ('optimizer_{}_accuracy:{}_step:{}_{}.pth'.format(get_time(), accuracy,
+                                                                                               self.step, extra)))
+    def schedule_lr(self):
+        for params in self.optimizer.param_groups:
+            params['lr'] /= 10
+        print(self.optimizer)
+
     def infer(self, conf, faces, target_embs, tta=False):
         '''
         faces : list of PIL Image

diff --git a/src/python/insight_face/config.py b/src/python/insight_face/config.py
@@ -43,8 +43,8 @@ def get_config(training=True):
         conf.ce_loss = CrossEntropyLoss()
     # --------------------Inference Config ------------------------
     else:
-        conf.facebank_path = conf.data_path / 'facebank'
-        conf.threshold = 1.5
+        conf.facebank_path = conf.data_path
+        conf.threshold = 0.8
         conf.face_limit = 10
         # when inference, at maximum detect 10 faces in one image, my laptop is slow
         conf.min_face_size = 30