php · iliaal · Jun 7, 2026
diff --git a/mailparse_encoding.c b/mailparse_encoding.c
@@ -490,6 +490,180 @@ int mb_convert_filter_flush(mb_convert_filter *filter)
 	return 0;
 }
 
+/* =============================================================================
+ * Bulk (buffer-at-a-time) decoders
+ *
+ * These decode a whole input block in one tight loop, appending decoded bytes
+ * straight to the output smart_string. They reuse the filter's status/cache as
+ * the carry state between blocks, so the result is byte-for-byte identical to
+ * feeding the same bytes through mb_convert_filter_feed() one at a time -- but
+ * without a function-pointer call per input and per output byte, which is the
+ * dominant cost when decoding large base64/quoted-printable bodies. Only the
+ * decode directions used during MIME extraction are handled.
+ * ============================================================================= */
+
+static void mb_base64_decode_block(mb_convert_filter *filter, const char *in, size_t len, smart_string *out)
+{
+	int status = filter->status;
+	int cache = filter->cache;
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		int c = (unsigned char) in[i];
+		int n;
+
+		if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) {
+			continue;	/* CR, LF, SPACE, HTAB or '=' */
+		}
+		if (c >= 0x41 && c <= 0x5a) {		/* A - Z */
+			n = c - 65;
+		} else if (c >= 0x61 && c <= 0x7a) {	/* a - z */
+			n = c - 71;
+		} else if (c >= 0x30 && c <= 0x39) {	/* 0 - 9 */
+			n = c + 4;
+		} else if (c == 0x2b) {			/* '+' */
+			n = 62;
+		} else if (c == 0x2f) {			/* '/' */
+			n = 63;
+		} else {
+			continue;			/* invalid character, ignored */
+		}
+		n &= 0x3f;
+
+		switch (status) {
+			case 0:
+				status = 1;
+				cache = n << 18;
+				break;
+			case 1:
+				status = 2;
+				cache |= n << 12;
+				break;
+			case 2:
+				status = 3;
+				cache |= n << 6;
+				break;
+			default:
+				status = 0;
+				n |= cache;
+				smart_string_appendc(out, (n >> 16) & 0xff);
+				smart_string_appendc(out, (n >> 8) & 0xff);
+				smart_string_appendc(out, n & 0xff);
+				break;
+		}
+	}
+
+	filter->status = status;
+	filter->cache = cache;
+}
+
+static void mb_base64_flush_block(mb_convert_filter *filter, smart_string *out)
+{
+	int status = filter->status;
+	int cache = filter->cache;
+
+	filter->status = 0;
+	filter->cache = 0;
+
+	if (status >= 2) {
+		smart_string_appendc(out, (cache >> 16) & 0xff);
+		if (status >= 3) {
+			smart_string_appendc(out, (cache >> 8) & 0xff);
+		}
+	}
+}
+
+static void mb_qprint_decode_block(mb_convert_filter *filter, const char *in, size_t len, smart_string *out)
+{
+	int status = filter->status;
+	int cache = filter->cache;
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		int c = (unsigned char) in[i];
+		int n, m;
+
+		switch (status) {
+			case 1:
+				if (hex2code_map[c] >= 0) {
+					cache = c;
+					status = 2;
+				} else if (c == 0x0d) {		/* soft line feed */
+					status = 3;
+				} else if (c == 0x0a) {		/* soft line feed */
+					status = 0;
+				} else {
+					smart_string_appendc(out, 0x3d);	/* '=' */
+					smart_string_appendc(out, c);
+					status = 0;
+				}
+				break;
+			case 2:
+				m = hex2code_map[c];
+				if (m < 0) {
+					smart_string_appendc(out, 0x3d);	/* '=' */
+					smart_string_appendc(out, cache);
+					n = c;
+				} else {
+					n = hex2code_map[cache] << 4 | m;
+				}
+				smart_string_appendc(out, n);
+				status = 0;
+				break;
+			case 3:
+				if (c != 0x0a) {		/* LF */
+					smart_string_appendc(out, c);
+				}
+				status = 0;
+				break;
+			default:
+				if (c == 0x3d) {		/* '=' */
+					status = 1;
+				} else {
+					smart_string_appendc(out, c);
+				}
+				break;
+		}
+	}
+
+	filter->status = status;
+	filter->cache = cache;
+}
+
+static void mb_qprint_flush_block(mb_convert_filter *filter, smart_string *out)
+{
+	int status = filter->status;
+	int cache = filter->cache;
+
+	filter->status = 0;
+	filter->cache = 0;
+
+	if (status == 1) {
+		smart_string_appendc(out, 0x3d);	/* '=' */
+	} else if (status == 2) {
+		smart_string_appendc(out, 0x3d);	/* '=' */
+		smart_string_appendc(out, cache);
+	}
+}
+
+void mb_convert_filter_feed_block(mb_convert_filter *filter, const char *in, size_t len, smart_string *out)
+{
+	if (filter->from->no_encoding == mb_no_encoding_base64) {
+		mb_base64_decode_block(filter, in, len, out);
+	} else if (filter->from->no_encoding == mb_no_encoding_qprint) {
+		mb_qprint_decode_block(filter, in, len, out);
+	}
+}
+
+void mb_convert_filter_flush_block(mb_convert_filter *filter, smart_string *out)
+{
+	if (filter->from->no_encoding == mb_no_encoding_base64) {
+		mb_base64_flush_block(filter, out);
+	} else if (filter->from->no_encoding == mb_no_encoding_qprint) {
+		mb_qprint_flush_block(filter, out);
+	}
+}
+
 /* =============================================================================
  * Encoding lookup functions
  * ============================================================================= */

diff --git a/mailparse_encoding.h b/mailparse_encoding.h
@@ -26,6 +26,7 @@
 #define MAILPARSE_ENCODING_H
 
 #include "php.h"
+#include "Zend/zend_smart_string.h"
 
 /* Encoding identifiers */
 enum mb_no_encoding {
@@ -92,6 +93,13 @@ void mb_convert_filter_delete(mb_convert_filter *filter);
 int mb_convert_filter_feed(int c, mb_convert_filter *filter);
 int mb_convert_filter_flush(mb_convert_filter *filter);
 
+/* Buffer-at-a-time decoders (BASE64 / Quoted-Printable -> 8bit). They use the
+ * filter's status/cache as carry state between blocks, so the output is
+ * identical to feeding the bytes through mb_convert_filter_feed() one at a
+ * time, without a per-byte function-pointer dispatch. */
+void mb_convert_filter_feed_block(mb_convert_filter *filter, const char *in, size_t len, smart_string *out);
+void mb_convert_filter_flush_block(mb_convert_filter *filter, smart_string *out);
+
 const mb_encoding* mb_name2encoding(const char *name);
 const mb_encoding* mb_no2encoding(enum mb_no_encoding no_encoding);
 

diff --git a/php_mailparse_mime.c b/php_mailparse_mime.c
@@ -906,21 +906,6 @@ PHP_MAILPARSE_API php_mimepart *php_mimepart_find_child_by_position(php_mimepart
 	return NULL;
 }
 
-static int filter_into_work_buffer(int c, void *dat)
-{
-	php_mimepart *part = dat;
-
-	smart_string_appendc(&part->parsedata.workbuf, c);
-
-	if (part->parsedata.workbuf.len >= 4096) {
-
-		part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len);
-		part->parsedata.workbuf.len = 0;
-	}
-
-	return c;
-}
-
 PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_decode, php_mimepart_extract_func_t decoder, void *ptr)
 {
 	const mb_encoding *encoding;
@@ -950,7 +935,7 @@ PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_d
 		} else {
 			part->extract_filter = mb_convert_filter_new(
 					mb_no2encoding(from), mb_no2encoding(mb_no_encoding_8bit),
-					filter_into_work_buffer,
+					NULL,
 					NULL,
 					part
 					);
@@ -962,8 +947,9 @@ PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_d
 PHP_MAILPARSE_API void php_mimepart_decoder_finish(php_mimepart *part)
 {
 	if (part->extract_filter) {
-		mb_convert_filter_flush(part->extract_filter);
+		mb_convert_filter_flush_block(part->extract_filter, &part->parsedata.workbuf);
 		mb_convert_filter_delete(part->extract_filter);
+		part->extract_filter = NULL;
 	}
 	if (part->extract_func && part->parsedata.workbuf.len > 0) {
 		part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len);
@@ -974,15 +960,11 @@ PHP_MAILPARSE_API void php_mimepart_decoder_finish(php_mimepart *part)
 PHP_MAILPARSE_API int php_mimepart_decoder_feed(php_mimepart *part, const char *buf, size_t bufsize)
 {
 	if (buf && bufsize) {
-		size_t i;
-
 		if (part->extract_filter) {
-			for (i = 0; i < bufsize; i++) {
-				if (mb_convert_filter_feed(buf[i], part->extract_filter) < 0) {
-					zend_error(E_WARNING, "%s() - filter conversion failed. Input message is probably incorrectly encoded\n",
-							get_active_function_name());
-					return -1;
-				}
+			mb_convert_filter_feed_block(part->extract_filter, buf, bufsize, &part->parsedata.workbuf);
+			if (part->parsedata.workbuf.len >= MAILPARSE_BUFSIZ) {
+				part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len);
+				part->parsedata.workbuf.len = 0;
 			}
 		} else {
 			return part->extract_func(part, part->extract_context, buf, bufsize);