-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_camelot.py
More file actions
41 lines (36 loc) · 1.34 KB
/
test_camelot.py
File metadata and controls
41 lines (36 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import sys
import camelot.io as camelot
import pandas as pd
# 1. PATH FIX: This must happen BEFORE importing camelot
gs_bin_path = r'C:\Program Files\gs\gs10.06.0\bin'
if gs_bin_path not in os.environ['PATH']:
os.environ['PATH'] += os.pathsep + gs_bin_path
try:
import camelot
# read_pdf is exported from camelot.io
from camelot.io import read_pdf
print("✅ Camelot module loaded correctly.")
except ImportError:
print("❌ Camelot not found. Run: pip install 'camelot-py[cv]'")
sys.exit()
def test_extraction(pdf_path):
print(f"Attempting extraction on: {pdf_path}")
try:
# flavor='lattice' targets PDFs with visible grid lines
tables = read_pdf(pdf_path, pages='7', flavor='lattice')
if len(tables) > 0:
print(f"✅ SUCCESS! Found {len(tables)} table(s).")
# Show the actual data to prove Ghostscript is working
print(tables[0].df.head())
else:
print("❓ Ghostscript works, but no tables were detected on page 1.")
except Exception as e:
print(f"❌ EXTRACTION FAILED!")
print(f"Error: {e}")
if __name__ == "__main__":
target_pdf = 'FSKTM brochure.pdf'
if os.path.exists(target_pdf):
test_extraction(target_pdf)
else:
print(f"❌ File not found: {target_pdf}. Put it in this folder!")