-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompressor.py
More file actions
108 lines (91 loc) · 3.93 KB
/
Copy pathcompressor.py
File metadata and controls
108 lines (91 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import fitz # PyMuPDF
from PIL import Image
import io
import os
import subprocess
import platform
def get_gs_command():
"""Get Ghostscript command based on OS."""
if platform.system() == "Windows":
return "gswin64c" # change to "gswin32c" if using 32-bit version
else:
return "gs"
def compress_pdf(input_path, output_path, quality=75):
"""
Compress PDF images using PyMuPDF + Pillow.
Uses aggressive save options. Warns if size increases.
"""
try:
doc = fitz.open(input_path)
images_found = False
for page in doc:
image_list = page.get_images(full=True)
if image_list:
images_found = True
for img in image_list:
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
# Open and compress
image = Image.open(io.BytesIO(image_bytes))
if image.mode in ("RGBA", "P"):
image = image.convert("RGB")
buffer = io.BytesIO()
image.save(buffer, format="JPEG", quality=quality, optimize=True, progressive=True)
compressed_bytes = buffer.getvalue()
# Replace
rect = page.get_image_rects(xref)[0]
page.insert_image(rect, stream=compressed_bytes, keep_proportion=True)
page.clean_contents()
# Font subsetting + strong optimization
doc.subset_fonts()
# Best save options (no invalid args)
doc.save(
output_path,
garbage=4, # aggressive garbage collection
deflate=True, # compress streams
deflate_images=True, # force image compression
deflate_fonts=True, # force font compression
clean=True # clean content streams
)
doc.close()
original_size = os.path.getsize(input_path)
new_size = os.path.getsize(output_path)
if new_size >= original_size:
msg = "File size increased or stayed the same."
if images_found:
msg += " Try much lower quality (30-50) or switch to Ghostscript."
else:
msg += " PDF may have few/no compressible images."
print("Warning:", msg)
else:
reduction = (original_size - new_size) / original_size * 100
print(f"Success: Reduced by {reduction:.1f}%")
print(f"PyMuPDF: {original_size:,} → {new_size:,} bytes")
except Exception as e:
print(f"PyMuPDF compression failed: {e}")
raise
def compress_with_ghostscript(input_path, output_path, level="ebook"):
try:
gs_cmd = get_gs_command()
cmd = [
gs_cmd,
"-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.4",
f"-dPDFSETTINGS=/{level}",
"-dNOPAUSE", "-dQUIET", "-dBATCH",
f"-sOutputFile={output_path}",
input_path
]
subprocess.run(cmd, check=True, capture_output=True, text=True)
original_size = os.path.getsize(input_path)
new_size = os.path.getsize(output_path)
reduction = (original_size - new_size) / original_size * 100 if original_size > 0 else 0
print(f"Ghostscript ({level}): {original_size:,} → {new_size:,} bytes "
f"({reduction:.1f}% smaller)")
except FileNotFoundError:
raise FileNotFoundError("Ghostscript ('gs' or 'gswin64c') not found. Install it.")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Ghostscript failed:\n{e.stderr}")
except Exception as e:
raise RuntimeError(f"Ghostscript error: {e}")