-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOCR + Flowchart Detection code
More file actions
193 lines (157 loc) · 6.46 KB
/
OCR + Flowchart Detection code
File metadata and controls
193 lines (157 loc) · 6.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#Integrated code of text extraction and and shape detection
# -*- coding: utf-8 -*-
"""flowchart_detection_with_yolo_and_easyocr_and_flow.ipynb
Automatically generated by Colab.
"""
# STEP 1: Install required libraries
# Install ultralytics for YOLOv8 model handling
!pip install ultralytics
# Install easyocr for robust text detection
!pip install easyocr
# Install other essential libraries
!pip install opencv-python-headless matplotlib Pillow
# STEP 2: Imports
import os
import cv2
import easyocr
import math
from google.colab import files, drive
import matplotlib.pyplot as plt
from ultralytics import YOLO
import json
# STEP 3: Mount Google Drive and load the trained model
drive.mount('/content/drive')
# STEP 4: Initialize EasyOCR reader
reader = easyocr.Reader(['en'])
print("✅ EasyOCR reader initialized.")
# STEP 5: Upload image and model file
print("\nUpload the flowchart image you want to process:")
uploaded_img = files.upload()
img_path = list(uploaded_img.keys())[0]
img = cv2.imread(img_path)
if img is None:
raise FileNotFoundError("❌ Flowchart image not found.")
else:
print(f"✅ Image '{img_path}' loaded successfully.")
print("\nUpload the trained model file 'best.pt':")
uploaded_model = files.upload()
# The uploaded file is accessible directly by its filename in the Colab environment.
yolo_model_path = "best.pt"
# Load the trained YOLOv8 model
model = YOLO(yolo_model_path)
print(f"✅ Loaded trained model from: {yolo_model_path}")
# STEP 6: Utility Functions for Flow Analysis
def get_center_point(box):
"""Calculates the center point of a bounding box."""
return ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
def distance(point1, point2):
"""Calculates the Euclidean distance between two points."""
return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
def find_flow_and_order_text(yolo_results, model_names, reader, img):
"""
Finds the flow of the flowchart and orders the text.
"""
shapes = []
arrows = []
# Separate shapes and arrows from the YOLO results
for r in yolo_results:
for box in r.boxes:
class_id = int(box.cls[0])
shape_type = model_names[class_id]
x1, y1, x2, y2 = box.xyxy[0].int().tolist()
bbox = (x1, y1, x2, y2)
# Assuming arrow classes have 'arrow' in their name
if 'arrow' in shape_type:
arrows.append({'type': shape_type, 'bbox': bbox})
else:
roi = img[y1:y2, x1:x2]
try:
text_result = reader.readtext(roi, detail=0, paragraph=True)
extracted_text = " ".join(text_result).strip()
except Exception as e:
extracted_text = ""
shapes.append({
'type': shape_type,
'bbox': bbox,
'text': extracted_text
})
# Find start node
start_node = next((s for s in shapes if s['type'] == 'start_end'), None)
if not start_node:
return "Start node not found. Cannot determine flow."
# Build the graph based on arrow connections
graph = {}
for shape in shapes:
graph[shape['bbox']] = []
for arrow in arrows:
arrow_center = get_center_point(arrow['bbox'])
# Find the two closest shapes to the arrow's center
shapes_sorted_by_distance = sorted(shapes, key=lambda s: distance(get_center_point(s['bbox']), arrow_center))
if len(shapes_sorted_by_distance) < 2:
continue
shape1 = shapes_sorted_by_distance[0]
shape2 = shapes_sorted_by_distance[1]
# Determine direction based on arrow type and relative position
x1_s1, y1_s1, _, _ = shape1['bbox']
x1_s2, y1_s2, _, _ = shape2['bbox']
if 'down' in arrow['type'] and (y1_s2 > y1_s1):
graph[shape1['bbox']].append(shape2)
elif 'right' in arrow['type'] and (x1_s2 > x1_s1):
graph[shape1['bbox']].append(shape2)
elif 'up' in arrow['type'] and (y1_s2 < y1_s1):
graph[shape1['bbox']].append(shape2)
elif 'left' in arrow['type'] and (x1_s2 < x1_s1):
graph[shape1['bbox']].append(shape2)
elif 'down' in arrow['type'] and (y1_s1 > y1_s2):
graph[shape2['bbox']].append(shape1)
elif 'right' in arrow['type'] and (x1_s1 > x1_s2):
graph[shape2['bbox']].append(shape1)
elif 'up' in arrow['type'] and (y1_s1 < y1_s2):
graph[shape2['bbox']].append(shape1)
elif 'left' in arrow['type'] and (x1_s1 < x1_s2):
graph[shape2['bbox']].append(shape1)
# Perform a simple traversal to get the final ordered string
ordered_text_list = []
current_node = start_node
visited = set()
while current_node and current_node['bbox'] not in visited:
if current_node['text']:
ordered_text_list.append(current_node['text'])
visited.add(current_node['bbox'])
# Move to the next node
next_nodes = graph.get(current_node['bbox'])
if next_nodes:
# For simplicity, just follow the first connection
current_node = next_nodes[0]
else:
current_node = None # End of the flow
return ", ".join(ordered_text_list)
# STEP 7: Perform detection and text extraction
results = model(img)
results_img = img.copy()
# Draw the bounding boxes and labels on the image for visualization
for r in results:
for box in r.boxes:
x1, y1, x2, y2 = box.xyxy[0].int().tolist()
class_id = int(box.cls[0])
shape_type = model.names[class_id]
# Draw the bounding box and label on the image
color = (0, 255, 0)
cv2.rectangle(results_img, (x1, y1), (x2, y2), color, 2)
label = f"{shape_type}"
cv2.putText(results_img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# STEP 8: Get and print the final ordered string
final_string = find_flow_and_order_text(results, model.names, reader, img)
print("\n--- Final Ordered String ---")
print(f"Text extracted from flowchart: {final_string}")
print("----------------------------")
# STEP 9: Save and display results
output_image_path = "final_flowchart_with_yolo_ocr.png"
cv2.imwrite(output_image_path, results_img)
print(f"✅ Final output saved as: {output_image_path}")
# Show inline
plt.figure(figsize=(12, 10))
plt.imshow(cv2.cvtColor(results_img, cv2.COLOR_BGR2RGB))
plt.title("Flowchart Detection with YOLO and EasyOCR")
plt.axis('off')
plt.show()