ai-content-process/image_transcribe_cli.py at main · defmethodinc/ai-content-process · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
"""
Image Transcription CLI Tool

This script transcribes images and returns structured JSON with title and description.
Supports various image formats: .jpg, .jpeg, .png, .gif, .bmp, .tiff, .tif, .webp

Example usage:
    python image_transcribe_cli.py path/to/image.jpg
    python image_transcribe_cli.py path/to/image.png --save results.json
"""

import argparse
import json
import sys
from pathlib import Path

from src.text_extractor import TextExtractor

def main():
    parser = argparse.ArgumentParser(
        description="Transcribe images and extract text with AI-powered analysis",
        epilog="""
Examples:
  %(prog)s image.jpg
  %(prog)s photo.png --save output.json
  %(prog)s document.jpg --pretty
        """,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument(
        'image_path',
        help='Path to the image file to transcribe'
    )

    parser.add_argument(
        '--save', '-s',
        help='Save JSON output to file instead of printing to console'
    )

    parser.add_argument(
        '--pretty', '-p',
        action='store_true',
        help='Pretty print JSON output (default when printing to console)'
    )

    args = parser.parse_args()

    # Validate image file
    image_path = Path(args.image_path)
    if not image_path.exists():
        print(f"Error: Image file not found: {args.image_path}", file=sys.stderr)
        sys.exit(1)

    if not image_path.is_file():
        print(f"Error: Path is not a file: {args.image_path}", file=sys.stderr)
        sys.exit(1)

    # Check if file has supported extension
    supported_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp'}
    if image_path.suffix.lower() not in supported_extensions:
        print(f"Error: Unsupported image format: {image_path.suffix}", file=sys.stderr)
        print(f"Supported formats: {', '.join(supported_extensions)}", file=sys.stderr)
        sys.exit(1)

    try:
        # Initialize text extractor
        print("Initializing AI models...", file=sys.stderr)
        extractor = TextExtractor()

        # Transcribe image
        print(f"Processing image: {image_path.name}...", file=sys.stderr)
        json_result = extractor.transcribe_image_to_json(image_path)

        # Handle output
        if args.save:
            # Save to file
            output_path = Path(args.save)
            with open(output_path, 'w', encoding='utf-8') as f:
                if args.pretty:
                    # Re-parse and pretty print
                    data = json.loads(json_result)
                    json.dump(data, f, indent=2, ensure_ascii=False)
                else:
                    f.write(json_result)
            print(f"Results saved to: {output_path}", file=sys.stderr)
        else:
            # Print to console (always pretty formatted)
            if args.pretty or True:  # Always pretty print to console
                data = json.loads(json_result)
                print(json.dumps(data, indent=2, ensure_ascii=False))
            else:
                print(json_result)

    except KeyboardInterrupt:
        print("\nOperation cancelled by user.", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Error: {str(e)}", file=sys.stderr)
        sys.exit(1)

if __name__ == "__main__":
    main()