-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_client_examples.py
More file actions
238 lines (184 loc) · 8 KB
/
api_client_examples.py
File metadata and controls
238 lines (184 loc) · 8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/usr/bin/env python3
"""
Text Extraction API Client Examples
Examples of how to use the Text Extraction API from Python.
"""
import requests
import json
from pathlib import Path
from typing import List, Dict, Any
# API Configuration
API_BASE_URL = "http://localhost:8001"
class TextExtractionClient:
"""Client for the Text Extraction API."""
def __init__(self, base_url: str = API_BASE_URL):
self.base_url = base_url.rstrip('/')
self.session = requests.Session()
def health_check(self) -> Dict[str, Any]:
"""Check API health and configuration."""
response = self.session.get(f"{self.base_url}/health")
response.raise_for_status()
return response.json()
def get_supported_types(self) -> Dict[str, Any]:
"""Get supported file types."""
response = self.session.get(f"{self.base_url}/supported-types")
response.raise_for_status()
return response.json()
def extract_single_file(self, file_path: str) -> Dict[str, Any]:
"""Extract text from a single file."""
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
with open(file_path, 'rb') as f:
files = {'file': (file_path.name, f, 'application/octet-stream')}
response = self.session.post(f"{self.base_url}/extract", files=files)
response.raise_for_status()
return response.json()
def extract_multiple_files(self, file_paths: List[str]) -> Dict[str, Any]:
"""Extract text from multiple files."""
files = []
file_handles = []
try:
for file_path in file_paths:
path = Path(file_path)
if not path.exists():
print(f"Warning: File not found: {path}")
continue
f = open(path, 'rb')
file_handles.append(f)
files.append(('files', (path.name, f, 'application/octet-stream')))
if not files:
raise ValueError("No valid files to process")
response = self.session.post(f"{self.base_url}/extract-batch", files=files)
response.raise_for_status()
return response.json()
finally:
# Close all file handles
for f in file_handles:
f.close()
def example_health_check():
"""Example: Check API health."""
print("=== Health Check Example ===")
client = TextExtractionClient()
try:
health = client.health_check()
print(f"✅ API Status: {health['status']}")
print(f" Message: {health['message']}")
print(f" OpenAI configured: {health['api_keys_configured']['openai']}")
print(f" Gemini configured: {health['api_keys_configured']['gemini']}")
# Show supported extensions
print("\n Supported Extensions:")
for processor, extensions in health['supported_extensions'].items():
print(f" {processor}: {', '.join(extensions)}")
except requests.exceptions.ConnectionError:
print("❌ Cannot connect to API. Make sure the server is running.")
except Exception as e:
print(f"❌ Health check failed: {e}")
def example_supported_types():
"""Example: Get supported file types."""
print("\n=== Supported Types Example ===")
client = TextExtractionClient()
try:
types = client.get_supported_types()
print(f"Total supported extensions: {types['total_supported']}")
print(f"All extensions: {', '.join(types['all_extensions'])}")
print("\nBy processor:")
for processor, extensions in types['processors'].items():
print(f" {processor}: {', '.join(extensions)}")
except Exception as e:
print(f"❌ Failed to get supported types: {e}")
def example_single_file():
"""Example: Extract text from a single file."""
print("\n=== Single File Extraction Example ===")
client = TextExtractionClient()
# Use the test file we created earlier
test_file = "test_sample.txt"
if not Path(test_file).exists():
print(f"❌ Test file not found: {test_file}")
print(" Please run this from the project directory or provide a valid file path.")
return
try:
result = client.extract_single_file(test_file)
print(f"✅ File ID: {result['file_id']}")
print(f" Success: {result['success']}")
print(f" Processor: {result['processor_used']}")
print(f" Processing time: {result['processing_time']:.2f}s")
print(f" Text length: {result['text_length']} characters")
print(f" File info: {result['file_info']['name']} ({result['file_info']['size_mb']:.2f} MB)")
if result['success']:
print(f"\n Extracted text preview:")
text = result['extracted_text']
preview = text[:200] + "..." if len(text) > 200 else text
print(f" {preview}")
else:
print(f" Error: {result['error']}")
except Exception as e:
print(f"❌ Single file extraction failed: {e}")
def example_multiple_files():
"""Example: Extract text from multiple files."""
print("\n=== Multiple Files Extraction Example ===")
client = TextExtractionClient()
# List of test files (adjust paths as needed)
test_files = [
"test_sample.txt",
# Add more files if available
# "document.pdf",
# "video.mp4"
]
# Filter to only existing files
existing_files = [f for f in test_files if Path(f).exists()]
if not existing_files:
print("❌ No test files found. Please add some files to test with.")
return
try:
result = client.extract_multiple_files(existing_files)
print(f"✅ Batch ID: {result['batch_id']}")
print(f" Total files: {result['total_files']}")
print(f" Successful: {result['successful']}")
print(f" Failed: {result['failed']}")
print(f" Total processing time: {result['total_processing_time']:.2f}s")
print(f" Total characters extracted: {result['total_characters']}")
print(f"\n Individual results:")
for i, file_result in enumerate(result['results']):
status = "✅" if file_result['success'] else "❌"
print(f" {i+1}. {status} {file_result['file_info']['name']}")
print(f" Processor: {file_result['processor_used']}")
print(f" Text length: {file_result['text_length']} chars")
if not file_result['success']:
print(f" Error: {file_result['error']}")
except Exception as e:
print(f"❌ Multiple files extraction failed: {e}")
def example_curl_commands():
"""Example: Show equivalent curl commands."""
print("\n=== Curl Command Examples ===")
print("1. Health check:")
print(" curl http://localhost:8001/health")
print("\n2. Get supported types:")
print(" curl http://localhost:8001/supported-types")
print("\n3. Extract text from single file:")
print(" curl -X POST http://localhost:8001/extract \\")
print(" -F 'file=@document.pdf'")
print("\n4. Extract text from multiple files:")
print(" curl -X POST http://localhost:8001/extract-batch \\")
print(" -F 'files=@document1.pdf' \\")
print(" -F 'files=@document2.txt' \\")
print(" -F 'files=@video.mp4'")
print("\n5. API documentation:")
print(" Open http://localhost:8001/docs in your browser")
def main():
"""Run all examples."""
print("Text Extraction API - Client Examples")
print("=" * 50)
# Run examples
example_health_check()
example_supported_types()
example_single_file()
example_multiple_files()
example_curl_commands()
print("\n" + "=" * 50)
print("Examples completed!")
print("\nTo start the API server, run:")
print(" python api_server.py")
print("\nThen open http://localhost:8001/docs for interactive API documentation.")
if __name__ == "__main__":
main()