-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_transcribe_examples.py
More file actions
175 lines (138 loc) · 5.86 KB
/
image_transcribe_examples.py
File metadata and controls
175 lines (138 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python3
"""
Image Transcription Examples
This script demonstrates how to use the image transcription functionality
to extract text and generate descriptions from images.
"""
import json
from pathlib import Path
from src.text_extractor import TextExtractor
def example_basic_image_transcription():
"""Example of basic image transcription."""
print("=== Basic Image Transcription Example ===")
# Initialize the text extractor
extractor = TextExtractor()
# Example image path (you would replace this with an actual image)
image_path = "example_image.jpg" # Replace with your image path
# Check if the image exists (for demo purposes)
if not Path(image_path).exists():
print(f"Note: Example image '{image_path}' not found.")
print("To use this example, replace 'example_image.jpg' with a real image path.")
return
try:
# Transcribe the image and get JSON result
json_result = extractor.transcribe_image_to_json(image_path)
# Parse and display the result
result = json.loads(json_result)
print(f"Image: {result['file_path']}")
print(f"Title: {result['title']}")
print(f"Description: {result['description']}")
print(f"Extracted Text: {result['extracted_text']}")
print(f"Processor Used: {result.get('processor_used', 'Unknown')}")
except Exception as e:
print(f"Error: {e}")
def example_batch_image_processing():
"""Example of processing multiple images."""
print("\n=== Batch Image Processing Example ===")
# Initialize the text extractor
extractor = TextExtractor()
# Example image paths (replace with your actual images)
image_paths = [
"image1.jpg",
"image2.png",
"document_scan.jpg",
"screenshot.png"
]
results = []
for image_path in image_paths:
if Path(image_path).exists():
try:
json_result = extractor.transcribe_image_to_json(image_path)
result = json.loads(json_result)
results.append(result)
print(f"✓ Processed: {Path(image_path).name}")
except Exception as e:
print(f"✗ Failed {Path(image_path).name}: {e}")
else:
print(f"⚠ Skipped {image_path}: File not found")
# Display summary
if results:
print(f"\nProcessed {len(results)} images successfully:")
for result in results:
print(f"- {Path(result['file_path']).name}: \"{result['title']}\"")
def example_image_processor_direct_usage():
"""Example of using the ImageProcessor directly."""
print("\n=== Direct ImageProcessor Usage Example ===")
try:
from src.file_processors.image_processor import ImageProcessor
# Initialize the image processor directly
processor = ImageProcessor()
image_path = "example_image.jpg" # Replace with your image path
if not Path(image_path).exists():
print(f"Note: Example image '{image_path}' not found.")
print("To use this example, replace 'example_image.jpg' with a real image path.")
return
# Get detailed image data
result = processor.extract_image_data(image_path)
if result['success']:
print("Image Analysis Results:")
print(f" Title: {result['title']}")
print(f" Description: {result['description']}")
print(f" Extracted Text: {result['extracted_text']}")
print(f" Processor: {result['processor_used']}")
else:
print(f"Error: {result['error']}")
except Exception as e:
print(f"Error initializing ImageProcessor: {e}")
def example_save_results_to_file():
"""Example of saving image transcription results to files."""
print("\n=== Save Results to File Example ===")
extractor = TextExtractor()
image_path = "example_image.jpg" # Replace with your image path
if not Path(image_path).exists():
print(f"Note: Example image '{image_path}' not found.")
return
try:
# Get the JSON result
json_result = extractor.transcribe_image_to_json(image_path)
# Save to file
output_file = "image_transcription_result.json"
with open(output_file, 'w', encoding='utf-8') as f:
# Parse and pretty-print the JSON
result = json.loads(json_result)
json.dump(result, f, indent=2, ensure_ascii=False)
print(f"Results saved to: {output_file}")
# Also save just the text content
result = json.loads(json_result)
text_file = "extracted_text.txt"
with open(text_file, 'w', encoding='utf-8') as f:
f.write(f"Title: {result['title']}\n\n")
f.write(f"Description:\n{result['description']}\n\n")
if result['extracted_text']:
f.write(f"Extracted Text:\n{result['extracted_text']}\n")
print(f"Text content saved to: {text_file}")
except Exception as e:
print(f"Error: {e}")
def main():
"""Run all examples."""
print("Image Transcription Examples")
print("=" * 40)
print()
print("This script demonstrates various ways to transcribe images")
print("and extract structured information including title, description,")
print("and any readable text content.")
print()
# Run examples
example_basic_image_transcription()
example_batch_image_processing()
example_image_processor_direct_usage()
example_save_results_to_file()
print("\n" + "=" * 40)
print("Examples completed!")
print()
print("To use with real images:")
print("1. Replace 'example_image.jpg' with your actual image paths")
print("2. Ensure you have OPENAI_API_KEY or GOOGLE_API_KEY set in your .env file")
print("3. Supported formats: .jpg, .jpeg, .png, .gif, .bmp, .tiff, .tif, .webp")
if __name__ == "__main__":
main()