emsch · genryxy · May 28, 2020 · May 28, 2020 · May 29, 2020 · May 30, 2020
diff --git a/python/bin/run_answers_consumer b/python/bin/run_answers_consumer
@@ -10,7 +10,6 @@ import pymongo
 import pymongo.errors
 import femida_detect
 
-
 parser = argparse.ArgumentParser()
 ANSWERS = 'answers'
 PDFS = 'pdfs'
@@ -104,14 +103,14 @@ def main(args):
             _, class_, type_, variant = pdf_comment.split('_')
 
             if type_ == '\u041e\u0422':
-                type_ = 'ot'  
+                type_ = 'ot'
             else:
                 type_ = 'mat'
 
             result = dict(
                 page=task['i'],
                 personal=[{'class': class_,
-                          'type': type_}],
+                           'type': type_}],
                 UUID=task['UUID'],
                 requested_manual=[],
                 manual_checks=[],
@@ -131,6 +130,7 @@ def main(args):
             result['img_fio'] = img_fio
             logger.debug(f"Task _id={task['_id']} :: {task['i']} :: predict(cropped)")
             predictions = predict(cropped)
+            cropped.set_predictions(predictions)
             logger.debug(f"Task _id={task['_id']} :: {task['i']} :: cropped.plot_predicted(predictions)")
             inpainted = cropped.plot_predicted(predictions, only_answers=True)
             img_test_form = os.path.join(args.save_path, f"{task['UUID']}__{task['i']}_test_form.jpg")
@@ -140,12 +140,23 @@ def main(args):
                 cv2.resize(inpainted, (1000, 1000 * h // w))
             )
             result['img_test_form'] = img_test_form
-            for (j, letter), pred in zip(
-                    cropped.get_labels(),
-                    predictions
-            ):
-                if pred:
-                    test_results[str(j)] += letter
+            if (([cropped.tag_answer] in predictions) or
+                    ([cropped.tag_empty] in predictions)):
+                for (j, letter), pred in zip(
+                        cropped.get_labels(),
+                        predictions
+                ):
+                    if (pred == cropped.tag_answer and
+                            j <= cropped.number_questions):
+                        test_results[str(j)] += letter
+                test_updates[0]['updates'] = cropped.get_dict_with_corrections()
+            else:
+                for (j, letter), pred in zip(
+                        cropped.get_labels(),
+                        predictions
+                ):
+                    if pred:
+                        test_results[str(j)] += letter
             logger.info(f"Task _id={task['_id']} :: {task['i']} :: status :: normal")
             result.update(status='normal')
             task['result'] = result

diff --git a/python/femida_detect/imgparse.py b/python/femida_detect/imgparse.py
@@ -39,9 +39,42 @@
         list(map(int, os.environ["FEMIDA_OCR_BORDER_LEFT"].split(",")))
     )
 
+if "FEMIDA_OCR_UPDATES_BORDER_LEFT" not in os.environ:
+    BORDER_UPDATES_LEFT = np.array(
+        [
+            485,
+            593,
+            715,
+            838,
+            960,
+            1080,
+            1200,
+            1560,
+            1663,
+            1783,
+            1906,
+            2030,
+            2145,
+            2270,
+        ]
+    )
+else:
+    BORDER_UPDATES_LEFT = np.array(
+        list(map(int, os.environ["FEMIDA_OCR_UPDATES_BORDER_LEFT"].split(",")))
+    )
+
+if "FEMIDA_OCR_VARIANT_BORDER_LEFT" not in os.environ:
+    BORDER_VARIANT_LEFT = np.array([450, 565, 687, 810, 937])
+else:
+    BORDER_VARIANT_LEFT = np.array(
+        list(map(int, os.environ["FEMIDA_OCR_VARIANT_BORDER_LEFT"].split(",")))
+    )
+
 MARGIN_HORIZONTAL = int(os.environ.get("FEMIDA_OCR_MARGIN_HORIZONTAL", 84))
 
 BORDER_RIGHT = BORDER_LEFT + MARGIN_HORIZONTAL
+BORDER_UPDATES_RIGHT = BORDER_UPDATES_LEFT + MARGIN_HORIZONTAL
+BORDER_VARIANT_RIGHT = BORDER_VARIANT_LEFT + MARGIN_HORIZONTAL
 
 if "FEMIDA_OCR_BORDER_TOP" not in os.environ:
     BORDER_TOP = np.array([1322, 1450, 1580, 1715, 1843, 2087, 2217, 2349, 2479, 2610])
@@ -50,9 +83,25 @@
         list(map(int, os.environ["FEMIDA_OCR_BORDER_TOP"].split(",")))
     )
 
+if "FEMIDA_OCR_UPDATES_BORDER_TOP" not in os.environ:
+    BORDER_UPDATES_TOP = np.array([3235, 3359, 3479, 3600, 3719, 3841])
+else:
+    BORDER_UPDATES_TOP = np.array(
+        list(map(int, os.environ["FEMIDA_OCR_UPDATES_BORDER_TOP"].split(",")))
+    )
+
+if "FEMIDA_OCR_VARIANT_BORDER_TOP" not in os.environ:
+    BORDER_VARIANT_TOP = np.array([958])
+else:
+    BORDER_VARIANT_TOP = np.array(
+        list(map(int, os.environ["FEMIDA_OCR_VARIANT_BORDER_TOP"].split(",")))
+    )
+
 MARGIN_VERTICAL = int(os.environ.get("FEMIDA_OCR_MARGIN_VERTICAL", 83))
 
 BORDER_BOTTOM = BORDER_TOP + MARGIN_VERTICAL
+BORDER_UPDATES_BOTTOM = BORDER_UPDATES_TOP + MARGIN_VERTICAL
+BORDER_VARIANT_BOTTOM = BORDER_VARIANT_TOP + MARGIN_VERTICAL
 
 TOP_BLACK_LINE_POSITIONS = (1225, 1275)
 TOP_BLACK_LINE_LEFT_RIGHT = 400
@@ -77,10 +126,15 @@ def need_flip(image):
 
 
 LABELS = ("A", "B", "C", "D", "E")
+LABELS_UPDATES = ("FIRST", "SECOND", "A", "B", "C", "D", "E")
+VARIANT_DIGITS = tuple(range(53, 59))
 QUESTIONS = tuple(range(1, 41))
+UPDATES = tuple(range(41, 53))
 WIDTH = 3000
 HEIGHT = int(WIDTH * 578 / 403)
 Box = collections.namedtuple("Box", "center,delta,angle")
+TAG_ANSWER = 11
+TAG_EMPTY = 10
 
 
 def box_to_slice(box):
@@ -106,6 +160,31 @@ def _get_small_rectangles_positions_middle():
         label = labels[i % len(LABELS)]
         box = (question, label), Box((xc[i, j], yc[i, j]), (dx[i, j], dy[i, j]), 0.0)
         result.append(box)
+
+    xl, yt = np.meshgrid(BORDER_UPDATES_LEFT, BORDER_UPDATES_TOP)
+    xr, yb = np.meshgrid(BORDER_UPDATES_RIGHT, BORDER_UPDATES_BOTTOM)
+    xc, yc = (xl + xr) / 2, (yt + yb) / 2
+    dx, dy = (xl - xr), (yb - yt)
+    labels = LABELS_UPDATES
+    # horizontal checking for updates (first number, second number, 5 answer choices)
+    for i, j in itertools.product(range(xc.shape[0]), range(xc.shape[1])):
+        # add (xc.shape[0]) if works with the right side of updates
+        mistake = UPDATES[i + (j >= xc.shape[1] // 2) * (xc.shape[0])]
+        label = labels[j % len(LABELS_UPDATES)]
+        box = (mistake, label), Box((xc[i, j], yc[i, j]), (dx[i, j], dy[i, j]), 0.0)
+        result.append(box)
+
+    xl, yt = np.meshgrid(BORDER_VARIANT_LEFT, BORDER_VARIANT_TOP)
+    xr, yb = np.meshgrid(BORDER_VARIANT_RIGHT, BORDER_VARIANT_BOTTOM)
+    xc, yc = (xl + xr) / 2, (yt + yb) / 2
+    dx, dy = (xl - xr), (yb - yt)
+    # horizontal checking for variant
+    for i, j in itertools.product(range(xc.shape[0]), range(xc.shape[1])):
+        # only one row
+        variant = VARIANT_DIGITS[j]
+        label = 0
+        box = (variant, label), Box((xc[i, j], yc[i, j]), (dx[i, j], dy[i, j]), 0.0)
+        result.append(box)
     return tuple(result)
 
 
@@ -294,10 +373,82 @@ def plot_predicted(self, labels, only_answers=False):
             recognized = recognized[self.ANSWERS_BOX]
         return recognized
 
+    def set_predictions(self, predictions):
+        self.predictions = predictions
+
+    @staticmethod
+    def is_digit(prediction: int) -> bool:
+        return prediction != TAG_ANSWER and prediction != TAG_EMPTY
+
+    def get_number_task(self, decades: int, units: int) -> int:
+        res = 0
+        if self.is_digit(units[0]):
+            res += units[0]
+            if self.is_digit(decades[0]):
+                res += decades[0] * 10
+        else:
+            if self.is_digit(decades[0]):
+                return decades[0]
+            else:
+                return -1
+        return res
+
+    def get_all_numbers_tasks(self) -> list:
+        updates_number = []
+        for i in range(len(QUESTIONS) * len(LABELS),
+                       len(QUESTIONS) * len(LABELS)
+                       + len(LABELS_UPDATES) * len(UPDATES),
+                       len(LABELS_UPDATES)):
+            updates_number.append(i)
+
+        keys = []
+        counter = 0
+        second_column = len(updates_number) // 2 + 1
+        # first row from the left side then first row from the right side
+        for i in updates_number:
+            curr_number = self.get_number_task(
+                self.predictions[i], self.predictions[i + 1])
+            if counter % 2 == 0:
+                keys.append((counter // 2 + len(QUESTIONS) + 1, curr_number))
+            else:
+                keys.append((second_column + len(QUESTIONS), curr_number))
+                second_column += 1
+            counter += 1
+        return sorted(keys, key=lambda x: x[0])
+
+    def get_dict_with_corrections(self):
+        keys = self.get_all_numbers_tasks()
+        test_updates = dict.fromkeys(list(map(str, list(zip(*keys))[1])), '')
+        for (j, letter), pred in zip(
+                self.get_labels(),
+                self.predictions
+        ):
+            if len(QUESTIONS) < j < len(QUESTIONS) + len(UPDATES) + 1:
+                if pred == TAG_ANSWER:
+                    test_updates[str(keys[j - len(QUESTIONS) - 1][1])] += letter
+
+        # remove key '-1' because the task with such number doesn't exist
+        # this key appears if the number of updates != max possible
+        test_updates.pop('-1', None)
+        return test_updates
+
     PERSONAL_BOX = (slice(20, 1100), slice(45, 3000))
     ANSWERS_BOX = (slice(1150, -200), slice(45, 3000))
     MATH_CHECKBOX = (slice(780, 870), slice(490, 570))
     OT_CHECKBOX = (slice(780, 870), slice(1220, 1300))
+    predictions = []
+
+    @property
+    def number_questions(self):
+        return len(QUESTIONS)
+
+    @property
+    def tag_answer(self):
+        return TAG_ANSWER
+
+    @property
+    def tag_empty(self):
+        return TAG_EMPTY
 
     @property
     def personal(self):