CodeCanvas/OCR + Flowchart Detection code at main · AAC-Open-Source-Pool/CodeCanvas · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#Integrated code of text extraction and and shape detection
# -*- coding: utf-8 -*-
"""flowchart_detection_with_yolo_and_easyocr_and_flow.ipynb

Automatically generated by Colab.
"""

# STEP 1: Install required libraries
# Install ultralytics for YOLOv8 model handling
!pip install ultralytics
# Install easyocr for robust text detection
!pip install easyocr
# Install other essential libraries
!pip install opencv-python-headless matplotlib Pillow

# STEP 2: Imports
import os
import cv2
import easyocr
import math
from google.colab import files, drive
import matplotlib.pyplot as plt
from ultralytics import YOLO
import json

# STEP 3: Mount Google Drive and load the trained model
drive.mount('/content/drive')

# STEP 4: Initialize EasyOCR reader
reader = easyocr.Reader(['en'])
print("✅ EasyOCR reader initialized.")

# STEP 5: Upload image and model file
print("\nUpload the flowchart image you want to process:")
uploaded_img = files.upload()
img_path = list(uploaded_img.keys())[0]
img = cv2.imread(img_path)

if img is None:
    raise FileNotFoundError("❌ Flowchart image not found.")
else:
    print(f"✅ Image '{img_path}' loaded successfully.")

print("\nUpload the trained model file 'best.pt':")
uploaded_model = files.upload()

# The uploaded file is accessible directly by its filename in the Colab environment.
yolo_model_path = "best.pt"

# Load the trained YOLOv8 model
model = YOLO(yolo_model_path)
print(f"✅ Loaded trained model from: {yolo_model_path}")

# STEP 6: Utility Functions for Flow Analysis

def get_center_point(box):
    """Calculates the center point of a bounding box."""
    return ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)

def distance(point1, point2):
    """Calculates the Euclidean distance between two points."""
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

def find_flow_and_order_text(yolo_results, model_names, reader, img):
    """
    Finds the flow of the flowchart and orders the text.
    """
    shapes = []
    arrows = []

    # Separate shapes and arrows from the YOLO results
    for r in yolo_results:
        for box in r.boxes:
            class_id = int(box.cls[0])
            shape_type = model_names[class_id]
            x1, y1, x2, y2 = box.xyxy[0].int().tolist()
            bbox = (x1, y1, x2, y2)

            # Assuming arrow classes have 'arrow' in their name
            if 'arrow' in shape_type:
                arrows.append({'type': shape_type, 'bbox': bbox})
            else:
                roi = img[y1:y2, x1:x2]
                try:
                    text_result = reader.readtext(roi, detail=0, paragraph=True)
                    extracted_text = " ".join(text_result).strip()
                except Exception as e:
                    extracted_text = ""

                shapes.append({
                    'type': shape_type,
                    'bbox': bbox,
                    'text': extracted_text
                })

    # Find start node
    start_node = next((s for s in shapes if s['type'] == 'start_end'), None)
    if not start_node:
        return "Start node not found. Cannot determine flow."

    # Build the graph based on arrow connections
    graph = {}
    for shape in shapes:
        graph[shape['bbox']] = []

    for arrow in arrows:
        arrow_center = get_center_point(arrow['bbox'])

        # Find the two closest shapes to the arrow's center
        shapes_sorted_by_distance = sorted(shapes, key=lambda s: distance(get_center_point(s['bbox']), arrow_center))

        if len(shapes_sorted_by_distance) < 2:
            continue

        shape1 = shapes_sorted_by_distance[0]
        shape2 = shapes_sorted_by_distance[1]

        # Determine direction based on arrow type and relative position
        x1_s1, y1_s1, _, _ = shape1['bbox']
        x1_s2, y1_s2, _, _ = shape2['bbox']

        if 'down' in arrow['type'] and (y1_s2 > y1_s1):
            graph[shape1['bbox']].append(shape2)
        elif 'right' in arrow['type'] and (x1_s2 > x1_s1):
            graph[shape1['bbox']].append(shape2)
        elif 'up' in arrow['type'] and (y1_s2 < y1_s1):
            graph[shape1['bbox']].append(shape2)
        elif 'left' in arrow['type'] and (x1_s2 < x1_s1):
            graph[shape1['bbox']].append(shape2)
        elif 'down' in arrow['type'] and (y1_s1 > y1_s2):
             graph[shape2['bbox']].append(shape1)
        elif 'right' in arrow['type'] and (x1_s1 > x1_s2):
             graph[shape2['bbox']].append(shape1)
        elif 'up' in arrow['type'] and (y1_s1 < y1_s2):
             graph[shape2['bbox']].append(shape1)
        elif 'left' in arrow['type'] and (x1_s1 < x1_s2):
             graph[shape2['bbox']].append(shape1)

    # Perform a simple traversal to get the final ordered string
    ordered_text_list = []
    current_node = start_node
    visited = set()

    while current_node and current_node['bbox'] not in visited:
        if current_node['text']:
            ordered_text_list.append(current_node['text'])
        visited.add(current_node['bbox'])

        # Move to the next node
        next_nodes = graph.get(current_node['bbox'])
        if next_nodes:
            # For simplicity, just follow the first connection
            current_node = next_nodes[0]
        else:
            current_node = None # End of the flow

    return ", ".join(ordered_text_list)

# STEP 7: Perform detection and text extraction
results = model(img)
results_img = img.copy()

# Draw the bounding boxes and labels on the image for visualization
for r in results:
    for box in r.boxes:
        x1, y1, x2, y2 = box.xyxy[0].int().tolist()
        class_id = int(box.cls[0])
        shape_type = model.names[class_id]

        # Draw the bounding box and label on the image
        color = (0, 255, 0)
        cv2.rectangle(results_img, (x1, y1), (x2, y2), color, 2)
        label = f"{shape_type}"
        cv2.putText(results_img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


# STEP 8: Get and print the final ordered string
final_string = find_flow_and_order_text(results, model.names, reader, img)
print("\n--- Final Ordered String ---")
print(f"Text extracted from flowchart: {final_string}")
print("----------------------------")

# STEP 9: Save and display results
output_image_path = "final_flowchart_with_yolo_ocr.png"
cv2.imwrite(output_image_path, results_img)
print(f"✅ Final output saved as: {output_image_path}")

# Show inline
plt.figure(figsize=(12, 10))
plt.imshow(cv2.cvtColor(results_img, cv2.COLOR_BGR2RGB))
plt.title("Flowchart Detection with YOLO and EasyOCR")
plt.axis('off')
plt.show()