-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcompress.py
More file actions
149 lines (128 loc) · 4.4 KB
/
compress.py
File metadata and controls
149 lines (128 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import zlib
from dataclasses import dataclass
import deflate
import zopfli.zlib
from reencode import Huffman, lz77, reencode
@dataclass(frozen=True)
class CompressionInfo:
method: str
window: int
delimiter: str
reencode: bool
ZOPFLI_ITERS: list[int] = [15, 128]
LIBDEFLATE_LEVELS: list[int] = [11, 12]
ZLIB_LEVELS: list[int] = [9]
DELIMS: list[bytes] = [b"'", b'"']
WINDOWS: list[int] = [-9, -10]
def _hoist_import(src: bytes) -> tuple[bytes, bytes]:
if src.startswith(b"import"):
module = src.split()[1]
return src[len(module) + 8 :], b"," + module
return src, b""
def _sanitize(b_in: bytes, delim: bytes) -> bytes:
b_out = bytearray()
for b, b_next in zip(b_in, [*b_in[1:], 0]):
if b == 0:
b_out += b"\\x00" if b_next in b"01234567" else b"\\0"
elif b == ord("\r"):
b_out += b"\\r"
elif b == ord("\\") and b_next in b"\0\n\r\"'01234567NU\\abfnrtuvx":
b_out += b"\\\\"
elif b == ord("\n") and len(delim) == 1:
b_out += b"\\n"
elif bytes([b]) == delim:
b_out += b"\\" + delim
else:
b_out.append(b)
return bytes(b_out)
def _wrap(deflate_data: bytes, delim: bytes, hoisted: bytes, window: int) -> bytes:
sanitized = _sanitize(reencode(deflate_data, delim), delim)
window_str = b",~9" if window == -10 else (b",%d" % window if window != 15 else b"")
return (
b"#coding:L1\nimport zlib"
+ hoisted
+ b"\nexec(zlib.decompress(bytes("
+ delim
+ sanitized
+ delim
+ b',"L1")'
+ window_str
+ b"))"
)
def compress(src: bytes) -> tuple[bytes, CompressionInfo]:
src, hoisted = _hoist_import(src)
compressed_data: list[tuple[bytes, str, int]] = []
for iters in ZOPFLI_ITERS:
full: bytes = zopfli.zlib.compress(
src,
numiterations=iters,
blocksplitting=False,
)
result = full[2:-4]
actual_window = -(((full[0] >> 4) & 0x0F) + 8)
compressed_data.append((result, f"zopfli(iters={iters})", -10))
if actual_window != -10:
compressed_data.append(
(
result,
f"zopfli(iters={iters})",
-9 if actual_window < 15 else actual_window,
),
)
compressed_data.extend(
(
bytes(deflate.deflate_compress(src, compresslevel=level)),
f"libdeflate(level={level})",
-10,
)
for level in LIBDEFLATE_LEVELS
)
for level in ZLIB_LEVELS:
for window in WINDOWS:
result = zlib.compress(
src,
level=level,
wbits=-15 if window == -10 else window,
)
compressed_data.append((result, f"zlib(level={level})", window))
candidates: list[tuple[bytes, CompressionInfo]] = []
for data, method, window in compressed_data:
for delim in DELIMS:
for use_reencode in [True, False]:
sanitized = _sanitize(
reencode(data, delim) if use_reencode else data,
delim,
)
window_str = (
b",~9"
if window == -10
else (b",%d" % window if window != 15 else b"")
)
code = (
b"#coding:L1\nimport zlib"
+ hoisted
+ b"\nexec(zlib.decompress(bytes("
+ delim
+ sanitized
+ delim
+ b',"L1")'
+ window_str
+ b"))"
)
candidates.append(
(
code,
CompressionInfo(
method=method,
window=window,
delimiter=delim.decode(),
reencode=use_reencode,
),
),
)
return min(candidates, key=lambda x: len(x[0]))
def compress_frozen(src: bytes, huffman_hex: str) -> bytes:
src, hoisted = _hoist_import(src)
huffman = Huffman.parse(bytes.fromhex(huffman_hex))
candidates = [_wrap(lz77(src, huffman, d), d, hoisted, -10) for d in DELIMS]
return min(candidates, key=len)