From 0b64fd5f8854703eb47ed89a3b0d472a13d0d651 Mon Sep 17 00:00:00 2001 From: Denis Ledoux Date: Thu, 6 Nov 2025 17:42:34 +0100 Subject: [PATCH 1/5] email: correctly indent with a least one space folded comments --- Lib/email/_header_value_parser.py | 10 ++- .../test_email/test__header_value_parser.py | 70 +++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 46fef2048babe7..d42506a3e36fd4 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2941,7 +2941,10 @@ def _refold_parse_tree(parse_tree, *, policy): # the way encoded strings handle continuation lines, we need to # be prepared to encode any whitespace if the next line turns # out to start with an encoded word. - lines.append(newline + tstr) + line = newline + tstr + if line[0] not in WSP: + line = ' ' + line + lines.append(line) whitespace_accumulator = [] for char in lines[-1]: @@ -2977,7 +2980,10 @@ def _refold_parse_tree(parse_tree, *, policy): # We can't figure out how to wrap, it, so give up. newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): - lines.append(newline + tstr) + line = newline + tstr + if line[0] not in WSP: + line = ' ' + line + lines.append(line) else: # We can't fold it onto the next line either... lines[-1] += tstr diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 426ec4644e3096..cd013b068c2c0a 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3294,6 +3294,76 @@ def test_address_list_with_specials_in_long_quoted_string(self): with self.subTest(to=to): self._test(parser.get_address_list(to)[0], folded, policy=policy) + def test_address_list_with_long_unwrapable_comment(self): + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + + # 1. Unwrappable Comments + + # Entire line is <= 40 characters, 40 characters exactly + # No folding + ('spy@example.org(loremipsumdolorsitametc)', 'spy@example.org(loremipsumdolorsitametc)\n'), + ('(loremipsumdolorsitametc)spy@example.org', '(loremipsumdolorsitametc)spy@example.org\n'), + # Entire line is > 40 characters, 41 characters + # Folding triggered + ('spy@example.org(loremipsumdolorsitametco)','spy@example.org\n (loremipsumdolorsitametco)\n'), + ('(loremipsumdolorsitametco)spy@example.org', '(loremipsumdolorsitametco)spy@example.org\n'), + # Entire line is > 40 characters, 54 characters, `len(tstr) <= maxlen - len(lines[-1])` is `True` + # Folding triggered + # Comment part < 40 characthers, 39 characters, `len(tstr) + 1 <= maxlen` is `True` + # No attempt to fold the subpart + ('spy@example.org(loremipsumdolorsitametconsecteturadip)', + 'spy@example.org\n' + ' (loremipsumdolorsitametconsecteturadip)\n'), + ('(loremipsumdolorsitametconsecteturadip)spy@example.org', + '(loremipsumdolorsitametconsecteturadip)spy@example.org\n'), + # Entire line is > 40 characters, 55 characters, `len(tstr) <= maxlen - len(lines[-1])` is `True` + # Folding triggered + # Comment part >= 40 characters, 40 characters exactly, `len(tstr) + 1 <= maxlen` is `False` + # Attempt to fold the subpart + ('spy@example.org(loremipsumdolorsitametconsecteturadipi)', + 'spy@example.org\n' + ' (loremipsumdolorsitametconsecteturadipi)\n'), + ('(loremipsumdolorsitametconsecteturadipi)spy@example.org', + '(loremipsumdolorsitametconsecteturadipi)spy@example.org\n'), + + # 2. Wrappable comments + + # Entire line is <= 40 characters, 40 characters exactly + # No folding + ('spy@example.org(loremipsumd olorsitamet)', 'spy@example.org(loremipsumd olorsitamet)\n'), + ('(loremipsumd olorsitamet)spy@example.org', '(loremipsumd olorsitamet)spy@example.org\n'), + # Entire line is > 40 characters, 41 characters + # Folding triggered + # Comment part < 40 characters + ('spy@example.org(loremipsumd olorsitametc)', 'spy@example.org\n (loremipsumd olorsitametc)\n'), + ('(loremipsumd olorsitametc)spy@example.org', '(loremipsumd olorsitametc)spy@example.org\n'), + # Entire line is > 40 characters, 56 characters + # Folding triggered + # Comment part > 40 characters, 41 characters + ('spy@example.org(loremipsumd loremipsumdolorsitametconse)', 'spy@example.org(loremipsumd\n loremipsumdolorsitametconse)\n'), + ('(loremipsumd loremipsumdolorsitametconse)spy@example.org', '(loremipsumd\n loremipsumdolorsitametconse)spy@example.org\n'), + # Entire line is > 40 characters, 70 characters + # Folding triggered + # Comment part > 40 characters, 55 characters + # One word in the comment > 40 characters, 41 characters + ('spy@example.org(loremipsumd loremipsumdolorsitametconsecteturadipisci)', 'spy@example.org(loremipsumd\n loremipsumdolorsitametconsecteturadipisci)\n'), + ('(loremipsumd loremipsumdolorsitametconsecteturadipisci)spy@example.org', '(loremipsumd\n loremipsumdolorsitametconsecteturadipisci)spy@example.org\n'), + + # 3. Nested comments + + ('spy@example.org((loremipsumdolorsitametconsecteturadi))', 'spy@example.org(\n (loremipsumdolorsitametconsecteturadi))\n'), + ('spy@example.org((loremipsumdolorsitametconsecteturadip))', 'spy@example.org(\n (loremipsumdolorsitametconsecteturadip)\n )\n'), + ('spy@example.org((loremipsumdolorsitam)(loremipsumdolorsitam))', 'spy@example.org((loremipsumdolorsitam)\n (loremipsumdolorsitam))\n'), + ('spy@example.org((loremipsumdolorsitametc)(loremipsumdolorsitametc))', 'spy@example.org(\n (loremipsumdolorsitametc)\n (loremipsumdolorsitametc))\n'), + ('spy@example.org(loremipsumdolorsitametc(loremipsumdolorsitametc))', 'spy@example.org(loremipsumdolorsitametc\n (loremipsumdolorsitametc))\n'), + ('spy@example.org((loremipsumdolorsitametc)loremipsumdolorsitametc)', 'spy@example.org(\n (loremipsumdolorsitametc)\n loremipsumdolorsitametc)\n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes # currently don't do the right thing. From 6142a37877b5bace7fdf44c23a73dc6272d74514 Mon Sep 17 00:00:00 2001 From: Denis Ledoux Date: Fri, 21 Nov 2025 16:35:57 +0100 Subject: [PATCH 2/5] email: use same strategy as quoted-string to fold comments - No forced space to indent, - use almost the same code as the `bare-quoted-string` block above, handle nested comments and escaped parenthesis in addition --- Lib/email/_header_value_parser.py | 23 ++++-- .../test_email/test__header_value_parser.py | 73 +++---------------- 2 files changed, 27 insertions(+), 69 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index d42506a3e36fd4..3d973fb0183c9a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -101,6 +101,12 @@ def make_quoted_pairs(value): return str(value).replace('\\', '\\\\').replace('"', '\\"') +def make_parenthesis_pairs(value): + """Escape parenthesis and backslash for use within a comment.""" + return str(value).replace('\\', '\\\\') \ + .replace('(', '\\(').replace(')', '\\)') + + def quote_string(value): escaped = make_quoted_pairs(value) return f'"{escaped}"' @@ -2941,10 +2947,7 @@ def _refold_parse_tree(parse_tree, *, policy): # the way encoded strings handle continuation lines, we need to # be prepared to encode any whitespace if the next line turns # out to start with an encoded word. - line = newline + tstr - if line[0] not in WSP: - line = ' ' + line - lines.append(line) + lines.append(newline + tstr) whitespace_accumulator = [] for char in lines[-1]: @@ -2966,6 +2969,13 @@ def _refold_parse_tree(parse_tree, *, policy): [ValueTerminal(make_quoted_pairs(p), 'ptext') for p in newparts] + [ValueTerminal('"', 'ptext')]) + if part.token_type == 'comment': + newparts = ( + [ValueTerminal('(', 'ptext')] + + [ValueTerminal(make_parenthesis_pairs(p), 'ptext') + if p.token_type == 'ptext' else p + for p in newparts] + + [ValueTerminal(')', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) @@ -2980,10 +2990,7 @@ def _refold_parse_tree(parse_tree, *, policy): # We can't figure out how to wrap, it, so give up. newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): - line = newline + tstr - if line[0] not in WSP: - line = ' ' + line - lines.append(line) + lines.append(newline + tstr) else: # We can't fold it onto the next line either... lines[-1] += tstr diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index cd013b068c2c0a..763aa8ae9167d8 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3298,67 +3298,18 @@ def test_address_list_with_long_unwrapable_comment(self): policy = self.policy.clone(max_line_length=40) cases = [ # (to, folded) - - # 1. Unwrappable Comments - - # Entire line is <= 40 characters, 40 characters exactly - # No folding - ('spy@example.org(loremipsumdolorsitametc)', 'spy@example.org(loremipsumdolorsitametc)\n'), - ('(loremipsumdolorsitametc)spy@example.org', '(loremipsumdolorsitametc)spy@example.org\n'), - # Entire line is > 40 characters, 41 characters - # Folding triggered - ('spy@example.org(loremipsumdolorsitametco)','spy@example.org\n (loremipsumdolorsitametco)\n'), - ('(loremipsumdolorsitametco)spy@example.org', '(loremipsumdolorsitametco)spy@example.org\n'), - # Entire line is > 40 characters, 54 characters, `len(tstr) <= maxlen - len(lines[-1])` is `True` - # Folding triggered - # Comment part < 40 characthers, 39 characters, `len(tstr) + 1 <= maxlen` is `True` - # No attempt to fold the subpart - ('spy@example.org(loremipsumdolorsitametconsecteturadip)', - 'spy@example.org\n' - ' (loremipsumdolorsitametconsecteturadip)\n'), - ('(loremipsumdolorsitametconsecteturadip)spy@example.org', - '(loremipsumdolorsitametconsecteturadip)spy@example.org\n'), - # Entire line is > 40 characters, 55 characters, `len(tstr) <= maxlen - len(lines[-1])` is `True` - # Folding triggered - # Comment part >= 40 characters, 40 characters exactly, `len(tstr) + 1 <= maxlen` is `False` - # Attempt to fold the subpart - ('spy@example.org(loremipsumdolorsitametconsecteturadipi)', - 'spy@example.org\n' - ' (loremipsumdolorsitametconsecteturadipi)\n'), - ('(loremipsumdolorsitametconsecteturadipi)spy@example.org', - '(loremipsumdolorsitametconsecteturadipi)spy@example.org\n'), - - # 2. Wrappable comments - - # Entire line is <= 40 characters, 40 characters exactly - # No folding - ('spy@example.org(loremipsumd olorsitamet)', 'spy@example.org(loremipsumd olorsitamet)\n'), - ('(loremipsumd olorsitamet)spy@example.org', '(loremipsumd olorsitamet)spy@example.org\n'), - # Entire line is > 40 characters, 41 characters - # Folding triggered - # Comment part < 40 characters - ('spy@example.org(loremipsumd olorsitametc)', 'spy@example.org\n (loremipsumd olorsitametc)\n'), - ('(loremipsumd olorsitametc)spy@example.org', '(loremipsumd olorsitametc)spy@example.org\n'), - # Entire line is > 40 characters, 56 characters - # Folding triggered - # Comment part > 40 characters, 41 characters - ('spy@example.org(loremipsumd loremipsumdolorsitametconse)', 'spy@example.org(loremipsumd\n loremipsumdolorsitametconse)\n'), - ('(loremipsumd loremipsumdolorsitametconse)spy@example.org', '(loremipsumd\n loremipsumdolorsitametconse)spy@example.org\n'), - # Entire line is > 40 characters, 70 characters - # Folding triggered - # Comment part > 40 characters, 55 characters - # One word in the comment > 40 characters, 41 characters - ('spy@example.org(loremipsumd loremipsumdolorsitametconsecteturadipisci)', 'spy@example.org(loremipsumd\n loremipsumdolorsitametconsecteturadipisci)\n'), - ('(loremipsumd loremipsumdolorsitametconsecteturadipisci)spy@example.org', '(loremipsumd\n loremipsumdolorsitametconsecteturadipisci)spy@example.org\n'), - - # 3. Nested comments - - ('spy@example.org((loremipsumdolorsitametconsecteturadi))', 'spy@example.org(\n (loremipsumdolorsitametconsecteturadi))\n'), - ('spy@example.org((loremipsumdolorsitametconsecteturadip))', 'spy@example.org(\n (loremipsumdolorsitametconsecteturadip)\n )\n'), - ('spy@example.org((loremipsumdolorsitam)(loremipsumdolorsitam))', 'spy@example.org((loremipsumdolorsitam)\n (loremipsumdolorsitam))\n'), - ('spy@example.org((loremipsumdolorsitametc)(loremipsumdolorsitametc))', 'spy@example.org(\n (loremipsumdolorsitametc)\n (loremipsumdolorsitametc))\n'), - ('spy@example.org(loremipsumdolorsitametc(loremipsumdolorsitametc))', 'spy@example.org(loremipsumdolorsitametc\n (loremipsumdolorsitametc))\n'), - ('spy@example.org((loremipsumdolorsitametc)loremipsumdolorsitametc)', 'spy@example.org(\n (loremipsumdolorsitametc)\n loremipsumdolorsitametc)\n'), + ('(loremipsumdolorsitametconsecteturadipi)', + '(loremipsumdolorsitametconsecteturadipi)\n'), + ('(loremipsumdolorsitametconsecteturadipi)', + '(loremipsumdolorsitametconsecteturadipi)\n'), + ('(loremipsum dolorsitametconsecteturadipi)', + '(loremipsum dolorsitametconsecteturadipi)\n'), + ('(loremipsum dolorsitametconsecteturadipi)', + '(loremipsum\n dolorsitametconsecteturadipi)\n'), + ('(Escaped \\( \\) chars \\\\ in comments stay escaped)', + '(Escaped \\( \\) chars \\\\ in comments stay\n escaped)\n'), + ('((loremipsum)(loremipsum)(loremipsum)(loremipsum))', + '((loremipsum)(loremipsum)(loremipsum)(loremipsum))\n'), ] for (to, folded) in cases: with self.subTest(to=to): From 7f0524444c0ae072d84e20d7ea7866f5fc76ac0b Mon Sep 17 00:00:00 2001 From: Denis Ledoux Date: Wed, 3 Dec 2025 11:12:26 +0100 Subject: [PATCH 3/5] email: do not fold WhiteSpaceTerminal not starting with a WSP e.g. with a 40 characters length policy, the below `((loremipsum)(loremipsum)(loremipsum)(loremipsum))` should not be folded to `((loremipsum)(loremipsum)(loremipsum)\n(loremipsum))` but should rather stay `((loremipsum)(loremipsum)(loremipsum)(loremipsum))` to prevent to add a new line not starting with a space / with no indentation --- Lib/email/_header_value_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 3d973fb0183c9a..172f9ef9e5f096 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -949,7 +949,7 @@ def value(self): return ' ' def startswith_fws(self): - return True + return self and self[0] in WSP class ValueTerminal(Terminal): From db74b26c97d7add03b5aaeac0e254cda78cab21e Mon Sep 17 00:00:00 2001 From: Denis Ledoux Date: Thu, 4 Dec 2025 11:05:31 +0100 Subject: [PATCH 4/5] email: test case for blank between nested comment To test a whitespace between two nested comments leads to a new line with one space indentation on folding --- Lib/test/test_email/test__header_value_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 763aa8ae9167d8..e28fe3892015b9 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3310,6 +3310,8 @@ def test_address_list_with_long_unwrapable_comment(self): '(Escaped \\( \\) chars \\\\ in comments stay\n escaped)\n'), ('((loremipsum)(loremipsum)(loremipsum)(loremipsum))', '((loremipsum)(loremipsum)(loremipsum)(loremipsum))\n'), + ('((loremipsum)(loremipsum)(loremipsum) (loremipsum))', + '((loremipsum)(loremipsum)(loremipsum)\n (loremipsum))\n'), ] for (to, folded) in cases: with self.subTest(to=to): From 8ad0fdb22f39f721409e5aa7b041d4f143b6d584 Mon Sep 17 00:00:00 2001 From: Denis Ledoux Date: Thu, 4 Dec 2025 11:37:07 +0100 Subject: [PATCH 5/5] email: add a blurb --- .../Security/2026-01-16-14-40-31.gh-issue-143935.U2YtKl.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2026-01-16-14-40-31.gh-issue-143935.U2YtKl.rst diff --git a/Misc/NEWS.d/next/Security/2026-01-16-14-40-31.gh-issue-143935.U2YtKl.rst b/Misc/NEWS.d/next/Security/2026-01-16-14-40-31.gh-issue-143935.U2YtKl.rst new file mode 100644 index 00000000000000..c3d864936884ac --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-14-40-31.gh-issue-143935.U2YtKl.rst @@ -0,0 +1,6 @@ +Fixed a bug in the folding of comments when flattening an email message +using a modern email policy. Comments consisting of a very long sequence of +non-foldable characters could trigger a forced line wrap that omitted the +required leading space on the continuation line, causing the remainder of +the comment to be interpreted as a new header field. This enabled header +injection with carefully crafted inputs.