-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml2pdf.py
More file actions
120 lines (91 loc) · 3.04 KB
/
html2pdf.py
File metadata and controls
120 lines (91 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
"""
HTML to PDF Converter
Converts all HTML files in a directory to PDF using WeasyPrint.
"""
import argparse
import sys
from pathlib import Path
from weasyprint import HTML
def convert_html_to_pdf(html_file, output_dir):
"""
Convert a single HTML file to PDF.
Args:
html_file: Path to the HTML file
output_dir: Directory to save the PDF
Returns:
True if successful, False otherwise
"""
try:
pdf_filename = html_file.stem + '.pdf'
pdf_path = output_dir / pdf_filename
print(f"Converting: {html_file.name} -> {pdf_filename}")
HTML(filename=str(html_file)).write_pdf(str(pdf_path))
print(f" ✓ Success: {pdf_path}")
return True
except Exception as e:
print(f" ✗ Error converting {html_file.name}: {str(e)}")
return False
def convert_directory(input_dir, output_dir):
"""
Convert all HTML files in a directory to PDF.
Args:
input_dir: Path to directory containing HTML files
output_dir: Path to directory for PDF output
"""
input_path = Path(input_dir)
output_path = Path(output_dir)
if not input_path.exists():
print(f"Error: Input directory '{input_dir}' does not exist.")
sys.exit(1)
if not input_path.is_dir():
print(f"Error: '{input_dir}' is not a directory.")
sys.exit(1)
# Create output directory if it doesn't exist
output_path.mkdir(parents=True, exist_ok=True)
# Find all HTML files
html_files = list(input_path.glob('*.html')) + list(input_path.glob('*.htm'))
if not html_files:
print(f"No HTML files found in '{input_dir}'")
return
print(f"\nFound {len(html_files)} HTML file(s) in '{input_dir}'")
print(f"Output directory: '{output_dir}'\n")
# Convert each file
successful = 0
failed = 0
for html_file in sorted(html_files):
if convert_html_to_pdf(html_file, output_path):
successful += 1
else:
failed += 1
# Summary
print(f"\n{'='*50}")
print(f"Conversion complete!")
print(f" Successful: {successful}")
print(f" Failed: {failed}")
print(f" Total: {len(html_files)}")
print(f"{'='*50}\n")
def main():
parser = argparse.ArgumentParser(
description='Convert HTML files to PDF using WeasyPrint',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
python html2pdf.py input_folder
python html2pdf.py input_folder -o my_pdfs
python html2pdf.py ./html_files --output ./pdfs
'''
)
parser.add_argument(
'input_dir',
help='Directory containing HTML files to convert'
)
parser.add_argument(
'-o', '--output',
default='output',
help='Output directory for PDF files (default: output)'
)
args = parser.parse_args()
convert_directory(args.input_dir, args.output)
if __name__ == '__main__':
main()