Skip to content

Commit 6fe0ab3

Browse files
committed
Fix Base45 dropping trailing bytes on non-ASCII input
The Base45 encoder and decoder iterated with range(0, len(text), step) while indexing into t = b(text). codext converts a bytes input to str (UTF-8) before the codec runs, so for any non-ASCII content b(text) is longer than text and len(text) stops the loop early, silently dropping the trailing byte(s). For example encode(b'\xcf\xb1\x1b') returned 'OBQ' instead of 'OBQR0' and the value no longer round-tripped. Iterate over len(t) (the actual byte sequence) instead. Output now matches RFC 9285 and the reference base45 implementation, and encoding round-trips for arbitrary byte input.
1 parent 578f57d commit 6fe0ab3

2 files changed

Lines changed: 18 additions & 2 deletions

File tree

src/codext/base/base45.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ def base45_encode(mode):
3434
b45 = _get_charset(B45, mode)
3535
def encode(text, errors="strict"):
3636
t, s = b(text), ""
37-
for i in range(0, len(text), 2):
37+
# iterate over the byte sequence (t), not len(text): when the input
38+
# holds non-ASCII characters, b(text) is longer than text and using
39+
# len(text) silently drops the trailing bytes
40+
for i in range(0, len(t), 2):
3841
n = 256 * __ord(t[i])
3942
try:
4043
n += __ord(t[i+1])
@@ -54,7 +57,7 @@ def base45_decode(mode):
5457
def decode(text, errors="strict"):
5558
t, s = b(text), ""
5659
ehandler = handle_error("base45", errors, decode=True)
57-
for i in range(0, len(text), 3):
60+
for i in range(0, len(t), 3):
5861
try:
5962
n = b45[__chr(t[i])]
6063
except KeyError:

tests/test_base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ def test_codec_base100(self):
211211
self.assertRaises(ValueError, codecs.decode, b(B100)[1:], "base100")
212212
self.assertIsNotNone(codecs.decode(b(B100) + b"\n", "base100", "ignore"))
213213

214+
def test_codec_base45(self):
215+
# RFC 9285 test vectors
216+
for s, b45 in [("AB", "BB8"), ("Hello!!", "%69 VD92EX0"), ("base-45", "UJCLQE7W581")]:
217+
self.assertEqual(codecs.encode(s, "base45"), b45)
218+
self.assertEqual(codecs.encode(b(s), "base45"), b(b45))
219+
self.assertEqual(codecs.decode(b45, "base45"), s)
220+
self.assertEqual(codecs.decode(b(b45), "base45"), b(s))
221+
# a trailing non-ASCII byte must not be dropped (byte length, not str length, drives encoding)
222+
self.assertEqual(codecs.encode(b"\xcf\xb1\x1b", "base45"), b"OBQR0")
223+
self.assertEqual(codecs.decode(b"OBQR0", "base45"), b"\xcf\xb1\x1b")
224+
for data in [b"\xff\xfe", b"hello", b"\x00", b"\x80\x81\x82\x83\x84"]:
225+
self.assertEqual(codecs.decode(codecs.encode(data, "base45"), "base45"), data)
226+
214227
def test_codec_base_generic(self):
215228
for n in range(2, 255):
216229
bn = "base{}_generic".format(n)

0 commit comments

Comments
 (0)