-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_api_pdf.py
More file actions
89 lines (69 loc) · 2.64 KB
/
test_api_pdf.py
File metadata and controls
89 lines (69 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
"""Test the API server with the PDF URL to verify the fix."""
import requests
import json
import time
def test_api_pdf_extraction():
"""Test PDF extraction via the API server."""
# API endpoint
base_url = "http://localhost:8000"
# Test data
pdf_url = "https://daniel.com.pt/shrek.pdf"
print("Testing PDF extraction via API server")
print("=" * 50)
print(f"URL: {pdf_url}")
# Test the URL endpoint
try:
print("\nSending request to API...")
start_time = time.time()
response = requests.post(
f"{base_url}/extract/url",
json={"url": pdf_url},
timeout=300 # 5 minute timeout
)
end_time = time.time()
print(f"Response status: {response.status_code}")
print(f"Total time: {end_time - start_time:.2f} seconds")
if response.status_code == 200:
result = response.json()
print("\nAPI Response:")
print(f"Success: {result.get('success', 'Unknown')}")
print(f"Processor used: {result.get('processor_used', 'Unknown')}")
print(f"Processing time: {result.get('processing_time', 'Unknown')} seconds")
print(f"Text length: {result.get('text_length', 0)} characters")
if result.get('error'):
print(f"Error: {result['error']}")
if result.get('success') and result.get('extracted_text'):
print(f"\nExtracted text preview (first 300 characters):")
print("-" * 30)
print(result['extracted_text'][:300])
if len(result['extracted_text']) > 300:
print("...")
# Show file info
if result.get('file_info'):
print(f"\nFile info:")
file_info = result['file_info']
for key, value in file_info.items():
print(f" {key}: {value}")
else:
print(f"API Error: {response.status_code}")
print(f"Response: {response.text}")
except requests.exceptions.ConnectionError:
print("❌ API server is not running!")
print("Start it with: python run_api.py")
return False
except requests.exceptions.Timeout:
print("❌ Request timed out (took longer than 5 minutes)")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
return True
if __name__ == "__main__":
print("PDF API Test")
print("=" * 30)
success = test_api_pdf_extraction()
if success:
print("\n✅ Test completed successfully!")
else:
print("\n❌ Test failed!")