-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPDFtoExcel.py
More file actions
31 lines (24 loc) · 970 Bytes
/
PDFtoExcel.py
File metadata and controls
31 lines (24 loc) · 970 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
# Define a function to convert the content of a PDF into an Excel file
from PyPDF2 import PdfReader
# Load in openpyxl and create a new workbook
import openpyxl
# Extract the file path for the uploaded PDF
pdf_path = "C:/Users/myuser123/Documents/Math/plan1.pdf"
excel_path = "C:/Users/myuser123/Documents/Math/plan1.xlsx"
def pdf_to_excel(pdf_path, excel_path):
reader = PdfReader(pdf_path)
data = []
for page in reader.pages:
lines = page.extract_text().split('\n')
data.extend(lines)
# Simple processing to structure the data (basic splitting and organization)
rows = []
for line in data:
if line.strip():
rows.append(line.split()) # Split line into columns based on whitespace
# Create DataFrame and save as Excel
df = pd.DataFrame(rows)
df.to_excel(excel_path, index=False, header=False)
pdf_to_excel(pdf_path, excel_path)
excel_path