-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHW4.py
More file actions
114 lines (86 loc) · 4.77 KB
/
HW4.py
File metadata and controls
114 lines (86 loc) · 4.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from difflib import SequenceMatcher
import cv2
import numpy as np
from pytesseract import image_to_string
from skimage.util import random_noise
#ground_truth = "This is a lot of 12 point text to test the ocr code and see if it works on all types of file format."
ground_truth = "STARBUCKS Store #10208 11302 Euclid Avenue Cleveland, OH (216) 229-0749 CHK 664290 12/07/2014 06:43 PM 1912003 Drawer: 2 Reg: 2 Vt Pep Mocha 4.95 Sbux Card 4.95 XXXXXXXXXXXX3228 Subtotal $4.95 Total $4.95 Change Due $0.00 Check Closed 12/07/2014 06:43 PM SBUX Card x3228 New Balance: 37.45 Card is registered."
#image_path = 'lab4/testocr-gt.jpg'
image_path = 'lab4/sample21.jpg'
original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
def evaluate_ocr(image):
ocr_text = image_to_string(image, config='--psm 6')
matcher = SequenceMatcher(None, ocr_text, ground_truth)
accuracy = matcher.ratio() * 100 # Accuracy as a percentage
correct_characters = int(matcher.ratio() * len(ground_truth))
incorrect_characters = len(ground_truth) - correct_characters
return correct_characters, incorrect_characters, accuracy, ocr_text
# Apply transformations and evaluate each one
results = {}
# 0. Original results
results['Original Image'] = evaluate_ocr(original_image)
# 1. Add Gaussian Noise
noisy_image_gaussian = (255 * random_noise(original_image, mode='gaussian')).astype(np.uint8)
results['Gaussian Noise'] = evaluate_ocr(noisy_image_gaussian)
# 2. Add Salt and Pepper Noise
noisy_image_sp = (255 * random_noise(original_image, mode='s&p', amount=0.1)).astype(np.uint8)
results['Salt and Pepper Noise'] = evaluate_ocr(noisy_image_sp)
# 3. Rotation (5 and 15 degrees)
def rotate_image(image, angle):
center = (image.shape[1] // 2, image.shape[0] // 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
return cv2.warpAffine(image, matrix, (image.shape[1], image.shape[0]))
rotated_image = rotate_image(original_image, 5)
results['Rotation (5 degrees)'] = evaluate_ocr(rotated_image)
another_rotated_image = rotate_image(original_image, 15)
results['Rotation (15 degrees)'] = evaluate_ocr(another_rotated_image)
# 4. Horizontal Shear
def shear_image(image, shear_factor):
M = np.array([[1, shear_factor, 0], [0, 1, 0]], dtype=float)
return cv2.warpAffine(image, M, (image.shape[1] + int(image.shape[0] * abs(shear_factor)), image.shape[0]))
sheared_image = shear_image(original_image, 0.2)
results['Horizontal Shear'] = evaluate_ocr(sheared_image)
# 5. Resize (Shrink by 50%)
resized_image_shrink = cv2.resize(original_image, (0, 0), fx=0.5, fy=0.5)
results['Resize (50% Shrink)'] = evaluate_ocr(resized_image_shrink)
# 6. Resize (Stretch Vertically)
resized_aspect_image = cv2.resize(original_image, (original_image.shape[1], int(original_image.shape[0] * 1.5)))
results['Resize (Stretch Vertically)'] = evaluate_ocr(resized_aspect_image)
# 7. Average Blur
blurred_avg = cv2.blur(original_image, (5, 5))
results['Average Blur'] = evaluate_ocr(blurred_avg)
# 8. Gaussian Blur
blurred_gaussian = cv2.GaussianBlur(original_image, (5, 5), sigmaX=1)
results['Gaussian Blur'] = evaluate_ocr(blurred_gaussian)
# 9. Image Sharpening
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened_image = cv2.filter2D(original_image, -1, kernel)
results['Sharpening'] = evaluate_ocr(sharpened_image)
# 10. Morphological Dilation
dilated_image = cv2.dilate(original_image, np.ones((3, 3), np.uint8), iterations=1)
results['Dilation'] = evaluate_ocr(dilated_image)
# 11. Morphological Erosion
eroded_image = cv2.erode(original_image, np.ones((3, 3), np.uint8), iterations=1)
results['Erosion'] = evaluate_ocr(eroded_image)
# 12. Thresholding
_, thresholded_image = cv2.threshold(original_image, 128, 255, cv2.THRESH_BINARY)
results['Thresholding'] = evaluate_ocr(thresholded_image)
# Sort results by accuracy in descending order
sorted_results = sorted(results.items(), key=lambda x: x[1][2], reverse=True)
# Print the results for each transformation
print("OCR Results for Each Transformation:\n")
for transformation, (correct, incorrect, accuracy, ocr_text) in sorted_results:
print(f"Transformation: {transformation}")
print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct characters: {correct}")
print(f"Incorrect characters: {incorrect}")
print(f"OCR Result:\n{ocr_text}")
print("-" * 40)
# Print the top 5 transformations with the highest accuracy
print("\nTop 5 Transformations with Highest OCR Accuracy:\n")
for transformation, (correct, incorrect, accuracy, _) in sorted_results[:5]:
print(f"{transformation}: {accuracy:.2f}% accuracy")
# Print the last 5 transformations with the lowest accuracy
print("\nBottom 5 Transformations with Lowest OCR Accuracy:\n")
for transformation, (correct, incorrect, accuracy, _) in sorted_results[-5:]:
print(f"{transformation}: {accuracy:.2f}% accuracy")