ai-content-process/image_transcribe_examples.py at main · defmethodinc/ai-content-process · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python3
"""
Image Transcription Examples

This script demonstrates how to use the image transcription functionality
to extract text and generate descriptions from images.
"""

import json
from pathlib import Path
from src.text_extractor import TextExtractor

def example_basic_image_transcription():
    """Example of basic image transcription."""
    print("=== Basic Image Transcription Example ===")

    # Initialize the text extractor
    extractor = TextExtractor()

    # Example image path (you would replace this with an actual image)
    image_path = "example_image.jpg"  # Replace with your image path

    # Check if the image exists (for demo purposes)
    if not Path(image_path).exists():
        print(f"Note: Example image '{image_path}' not found.")
        print("To use this example, replace 'example_image.jpg' with a real image path.")
        return

    try:
        # Transcribe the image and get JSON result
        json_result = extractor.transcribe_image_to_json(image_path)

        # Parse and display the result
        result = json.loads(json_result)

        print(f"Image: {result['file_path']}")
        print(f"Title: {result['title']}")
        print(f"Description: {result['description']}")
        print(f"Extracted Text: {result['extracted_text']}")
        print(f"Processor Used: {result.get('processor_used', 'Unknown')}")

    except Exception as e:
        print(f"Error: {e}")

def example_batch_image_processing():
    """Example of processing multiple images."""
    print("\n=== Batch Image Processing Example ===")

    # Initialize the text extractor
    extractor = TextExtractor()

    # Example image paths (replace with your actual images)
    image_paths = [
        "image1.jpg",
        "image2.png",
        "document_scan.jpg",
        "screenshot.png"
    ]

    results = []

    for image_path in image_paths:
        if Path(image_path).exists():
            try:
                json_result = extractor.transcribe_image_to_json(image_path)
                result = json.loads(json_result)
                results.append(result)
                print(f"✓ Processed: {Path(image_path).name}")
            except Exception as e:
                print(f"✗ Failed {Path(image_path).name}: {e}")
        else:
            print(f"⚠ Skipped {image_path}: File not found")

    # Display summary
    if results:
        print(f"\nProcessed {len(results)} images successfully:")
        for result in results:
            print(f"- {Path(result['file_path']).name}: \"{result['title']}\"")

def example_image_processor_direct_usage():
    """Example of using the ImageProcessor directly."""
    print("\n=== Direct ImageProcessor Usage Example ===")

    try:
        from src.file_processors.image_processor import ImageProcessor

        # Initialize the image processor directly
        processor = ImageProcessor()

        image_path = "example_image.jpg"  # Replace with your image path

        if not Path(image_path).exists():
            print(f"Note: Example image '{image_path}' not found.")
            print("To use this example, replace 'example_image.jpg' with a real image path.")
            return

        # Get detailed image data
        result = processor.extract_image_data(image_path)

        if result['success']:
            print("Image Analysis Results:")
            print(f"  Title: {result['title']}")
            print(f"  Description: {result['description']}")
            print(f"  Extracted Text: {result['extracted_text']}")
            print(f"  Processor: {result['processor_used']}")
        else:
            print(f"Error: {result['error']}")

    except Exception as e:
        print(f"Error initializing ImageProcessor: {e}")

def example_save_results_to_file():
    """Example of saving image transcription results to files."""
    print("\n=== Save Results to File Example ===")

    extractor = TextExtractor()
    image_path = "example_image.jpg"  # Replace with your image path

    if not Path(image_path).exists():
        print(f"Note: Example image '{image_path}' not found.")
        return

    try:
        # Get the JSON result
        json_result = extractor.transcribe_image_to_json(image_path)

        # Save to file
        output_file = "image_transcription_result.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            # Parse and pretty-print the JSON
            result = json.loads(json_result)
            json.dump(result, f, indent=2, ensure_ascii=False)

        print(f"Results saved to: {output_file}")

        # Also save just the text content
        result = json.loads(json_result)
        text_file = "extracted_text.txt"
        with open(text_file, 'w', encoding='utf-8') as f:
            f.write(f"Title: {result['title']}\n\n")
            f.write(f"Description:\n{result['description']}\n\n")
            if result['extracted_text']:
                f.write(f"Extracted Text:\n{result['extracted_text']}\n")

        print(f"Text content saved to: {text_file}")

    except Exception as e:
        print(f"Error: {e}")

def main():
    """Run all examples."""
    print("Image Transcription Examples")
    print("=" * 40)
    print()
    print("This script demonstrates various ways to transcribe images")
    print("and extract structured information including title, description,")
    print("and any readable text content.")
    print()

    # Run examples
    example_basic_image_transcription()
    example_batch_image_processing()
    example_image_processor_direct_usage()
    example_save_results_to_file()

    print("\n" + "=" * 40)
    print("Examples completed!")
    print()
    print("To use with real images:")
    print("1. Replace 'example_image.jpg' with your actual image paths")
    print("2. Ensure you have OPENAI_API_KEY or GOOGLE_API_KEY set in your .env file")
    print("3. Supported formats: .jpg, .jpeg, .png, .gif, .bmp, .tiff, .tif, .webp")

if __name__ == "__main__":
    main()