From 38a60b309be5080345588c09553562b2a8fc26c2 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Sun, 29 Mar 2026 22:56:10 +0100 Subject: [PATCH 1/5] [PR #12292/07a2ca47 backport][3.14] Adjust contributing-admins.rst (#12294) **This is a backport of PR #12292 as merged into master (07a2ca4791cff750960814e5eedda74a6021e8e3).** Co-authored-by: Sam Bull --- docs/contributing-admins.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/contributing-admins.rst b/docs/contributing-admins.rst index b17cbe1019a..749e1f87da1 100644 --- a/docs/contributing-admins.rst +++ b/docs/contributing-admins.rst @@ -35,9 +35,10 @@ first merge into the newer release branch (e.g. 3.8 into 3.9) and then to master #. Switch to target branch: e.g. ``git checkout 3.9 && git pull`` #. Start a merge: e.g. ``git merge 3.8 --no-commit --no-ff --gpg-sign`` #. Carefully review the changes and revert anything that should not be included (most - things outside the changelog). + things outside the changelog). Use `git checkout 3.9 path/to/file` to revert files + without aborting the merge. #. To ensure change fragments are cleaned up properly, run: ``python tools/cleanup_changes.py`` -#. Commit the merge (must be a normal merge commit, not squashed). +#. Complete the merge: `git merge --continue`. #. Push the branch directly to Github (because a PR would get squashed). When pushing, you may get a rejected message. Follow these steps to resolve: From 374fb1dc29baa5e5b11a113ce1a5d0d04116f8e3 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 00:33:41 +0100 Subject: [PATCH 2/5] [PR #12290/cfcad08d backport][3.14] fix: Fix zstd decompression of multi-frame responses (#12298) **This is a backport of PR #12290 as merged into master (cfcad08dbd4c2c4247f505d9a34ff5c09586b42e).** Co-authored-by: josumoreno-BP <104622722+josumoreno-BP@users.noreply.github.com> --- CHANGES/12234.bugfix.rst | 2 + CONTRIBUTORS.txt | 1 + aiohttp/compression_utils.py | 29 ++++++++++++- tests/test_compression_utils.py | 56 ++++++++++++++++++++++++- tests/test_http_parser.py | 73 +++++++++++++++++++++++++++++++++ 5 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 CHANGES/12234.bugfix.rst diff --git a/CHANGES/12234.bugfix.rst b/CHANGES/12234.bugfix.rst new file mode 100644 index 00000000000..64bcfa24f69 --- /dev/null +++ b/CHANGES/12234.bugfix.rst @@ -0,0 +1,2 @@ +Fixed zstd decompression failing with ``ClientPayloadError`` when the server +sends a response as multiple zstd frames -- by :user:`josu-moreno`. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 755bcb7d1aa..27d04363b7f 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -207,6 +207,7 @@ Jordan Borean Josep Cugat Josh Junon Joshu Coats +Josu Moreno Julia Tsemusheva Julien Duponchelle Jungkook Park diff --git a/aiohttp/compression_utils.py b/aiohttp/compression_utils.py index 9fb595e9bb2..562b2813401 100644 --- a/aiohttp/compression_utils.py +++ b/aiohttp/compression_utils.py @@ -330,6 +330,7 @@ def __init__( "Please install `backports.zstd` module" ) self._obj = ZstdDecompressor() + self._pending_unused_data: bytes | None = None super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size) def decompress_sync( @@ -342,7 +343,33 @@ def decompress_sync( if max_length == ZLIB_MAX_LENGTH_UNLIMITED else max_length ) - return self._obj.decompress(data, zstd_max_length) + if self._pending_unused_data is not None: + data = self._pending_unused_data + data + self._pending_unused_data = None + result = self._obj.decompress(data, zstd_max_length) + + # Handle multi-frame zstd streams. + # https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 + # ZstdDecompressor handles one frame only. When a frame ends, + # eof becomes True and any trailing data goes to unused_data. + # We create a fresh decompressor to continue with the next frame. + while self._obj.eof and self._obj.unused_data: + unused_data = self._obj.unused_data + self._obj = ZstdDecompressor() + if zstd_max_length != ZSTD_MAX_LENGTH_UNLIMITED: + zstd_max_length -= len(result) + if zstd_max_length <= 0: + self._pending_unused_data = unused_data + break + result += self._obj.decompress(unused_data, zstd_max_length) + + # Frame ended exactly at chunk boundary — no unused_data, but the + # next feed_data() call would fail on the spent decompressor. + # Prepare a fresh one for the next chunk. + if self._obj.eof: + self._obj = ZstdDecompressor() + + return result def flush(self) -> bytes: return b"" diff --git a/tests/test_compression_utils.py b/tests/test_compression_utils.py index fdaf91b36a0..3362b8feed0 100644 --- a/tests/test_compression_utils.py +++ b/tests/test_compression_utils.py @@ -1,8 +1,23 @@ """Tests for compression utils.""" +import sys + import pytest -from aiohttp.compression_utils import ZLibBackend, ZLibCompressor, ZLibDecompressor +from aiohttp.compression_utils import ( + ZLibBackend, + ZLibCompressor, + ZLibDecompressor, + ZSTDDecompressor, +) + +try: + if sys.version_info >= (3, 14): + import compression.zstd as zstandard # noqa: I900 + else: + import backports.zstd as zstandard +except ImportError: # pragma: no cover + zstandard = None # type: ignore[assignment] @pytest.mark.usefixtures("parametrize_zlib_backend") @@ -33,3 +48,42 @@ async def test_compression_round_trip_in_event_loop() -> None: compressed_data = await compressor.compress(data) + compressor.flush() decompressed_data = await decompressor.decompress(compressed_data) assert data == decompressed_data + + +@pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") +def test_zstd_multi_frame_unlimited() -> None: + d = ZSTDDecompressor() + frame1 = zstandard.compress(b"AAAA") + frame2 = zstandard.compress(b"BBBB") + result = d.decompress_sync(frame1 + frame2) + assert result == b"AAAABBBB" + + +@pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") +def test_zstd_multi_frame_max_length_partial() -> None: + d = ZSTDDecompressor() + frame1 = zstandard.compress(b"AAAA") + frame2 = zstandard.compress(b"BBBB") + result = d.decompress_sync(frame1 + frame2, max_length=6) + assert result == b"AAAABB" + + +@pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") +def test_zstd_multi_frame_max_length_exhausted() -> None: + d = ZSTDDecompressor() + frame1 = zstandard.compress(b"AAAA") + frame2 = zstandard.compress(b"BBBB") + result = d.decompress_sync(frame1 + frame2, max_length=4) + assert result == b"AAAA" + + +@pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") +def test_zstd_multi_frame_max_length_exhausted_preserves_unused_data() -> None: + d = ZSTDDecompressor() + frame1 = zstandard.compress(b"AAAA") + frame2 = zstandard.compress(b"BBBB") + frame3 = zstandard.compress(b"CCCC") + result1 = d.decompress_sync(frame1 + frame2, max_length=4) + assert result1 == b"AAAA" + result2 = d.decompress_sync(frame3) + assert result2 == b"BBBBCCCC" diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 2e37e584310..0119005d70d 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -2025,6 +2025,79 @@ async def test_http_payload_zstandard(self, protocol: BaseProtocol) -> None: assert b"zstd data" == out._buffer[0] assert out.is_eof() + @pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") + async def test_http_payload_zstandard_multi_frame( + self, protocol: BaseProtocol + ) -> None: + frame1 = zstandard.compress(b"first") + frame2 = zstandard.compress(b"second") + payload = frame1 + frame2 + out = aiohttp.StreamReader(protocol, 2**16, loop=asyncio.get_running_loop()) + p = HttpPayloadParser( + out, + length=len(payload), + compression="zstd", + headers_parser=HeadersParser(), + ) + p.feed_data(payload) + assert b"firstsecond" == b"".join(out._buffer) + assert out.is_eof() + + @pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") + async def test_http_payload_zstandard_multi_frame_chunked( + self, protocol: BaseProtocol + ) -> None: + frame1 = zstandard.compress(b"chunk1") + frame2 = zstandard.compress(b"chunk2") + out = aiohttp.StreamReader(protocol, 2**16, loop=asyncio.get_running_loop()) + p = HttpPayloadParser( + out, + length=len(frame1) + len(frame2), + compression="zstd", + headers_parser=HeadersParser(), + ) + p.feed_data(frame1) + p.feed_data(frame2) + assert b"chunk1chunk2" == b"".join(out._buffer) + assert out.is_eof() + + @pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") + async def test_http_payload_zstandard_frame_split_mid_chunk( + self, protocol: BaseProtocol + ) -> None: + frame1 = zstandard.compress(b"AAAA") + frame2 = zstandard.compress(b"BBBB") + combined = frame1 + frame2 + split_point = len(frame1) + 3 # 3 bytes into frame2 + out = aiohttp.StreamReader(protocol, 2**16, loop=asyncio.get_running_loop()) + p = HttpPayloadParser( + out, + length=len(combined), + compression="zstd", + headers_parser=HeadersParser(), + ) + p.feed_data(combined[:split_point]) + p.feed_data(combined[split_point:]) + assert b"AAAABBBB" == b"".join(out._buffer) + assert out.is_eof() + + @pytest.mark.skipif(zstandard is None, reason="zstandard is not installed") + async def test_http_payload_zstandard_many_small_frames( + self, protocol: BaseProtocol + ) -> None: + parts = [f"part{i}".encode() for i in range(10)] + payload = b"".join(zstandard.compress(p) for p in parts) + out = aiohttp.StreamReader(protocol, 2**16, loop=asyncio.get_running_loop()) + p = HttpPayloadParser( + out, + length=len(payload), + compression="zstd", + headers_parser=HeadersParser(), + ) + p.feed_data(payload) + assert b"".join(parts) == b"".join(out._buffer) + assert out.is_eof() + class TestDeflateBuffer: async def test_feed_data(self, protocol: BaseProtocol) -> None: From e37bbb7d6657d96d284269768735e62e63a04057 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:31:26 +0000 Subject: [PATCH 3/5] [PR #12302/2dc02ee0 backport][3.14] Narrow singleton header rejection to security-critical headers (#12304) Co-authored-by: J. Nick Koston Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Fixes home-assistant/core#166956 Fixes https://github.com/getmoto/moto/issues/9930 Fixes #12301 Fixes https://github.com/catalyst-cooperative/pudl-archiver/issues/1059 --- CHANGES/12302.bugfix.rst | 3 ++ aiohttp/_http_parser.pyx | 27 ++++++----- aiohttp/http_parser.py | 40 ++++++++-------- tests/test_http_parser.py | 97 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 131 insertions(+), 36 deletions(-) create mode 100644 CHANGES/12302.bugfix.rst diff --git a/CHANGES/12302.bugfix.rst b/CHANGES/12302.bugfix.rst new file mode 100644 index 00000000000..fe9e8fbd624 --- /dev/null +++ b/CHANGES/12302.bugfix.rst @@ -0,0 +1,3 @@ +Skipped the duplicate singleton header check in lax mode (the default for response +parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons +are still enforced -- by :user:`bdraco`. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index d53550b1007..5da835bc642 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -71,8 +71,11 @@ cdef object StreamReader = _StreamReader cdef object DeflateBuffer = _DeflateBuffer cdef bytes EMPTY_BYTES = b"" -# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 -cdef tuple SINGLETON_HEADERS = ( +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +cdef frozenset SINGLETON_HEADERS = frozenset({ hdrs.CONTENT_LENGTH, hdrs.CONTENT_LOCATION, hdrs.CONTENT_RANGE, @@ -83,7 +86,7 @@ cdef tuple SINGLETON_HEADERS = ( hdrs.SERVER, hdrs.TRANSFER_ENCODING, hdrs.USER_AGENT, -) +}) cdef inline object extend(object buf, const char* at, size_t length): cdef Py_ssize_t s @@ -304,6 +307,7 @@ cdef class HttpParser: size_t _max_headers bint _response_with_body bint _read_until_eof + bint _lax bint _started object _url @@ -311,6 +315,7 @@ cdef class HttpParser: str _path str _reason list _headers + set _seen_singletons list _raw_headers bint _upgraded list _messages @@ -377,6 +382,8 @@ cdef class HttpParser: self._upgraded = False self._auto_decompress = auto_decompress self._content_encoding = None + self._lax = False + self._seen_singletons = set() self._csettings.on_url = cb_on_url self._csettings.on_status = cb_on_status @@ -405,6 +412,10 @@ cdef class HttpParser: if "\x00" in value: raise InvalidHeader(self._raw_value) + if not self._lax and name in SINGLETON_HEADERS: + if name in self._seen_singletons: + raise BadHttpMessage(f"Duplicate '{name}' header found.") + self._seen_singletons.add(name) self._headers.append((name, value)) if len(self._headers) > self._max_headers: raise BadHttpMessage("Too many headers received") @@ -444,14 +455,6 @@ cdef class HttpParser: raw_headers = tuple(self._raw_headers) headers = CIMultiDictProxy(CIMultiDict(self._headers)) - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - bad_hdr = next( - (h for h in SINGLETON_HEADERS if len(headers.getall(h, ())) > 1), - None, - ) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - if self._cparser.type == cparser.HTTP_REQUEST: h_upg = headers.get("upgrade", "") allowed = upgrade and h_upg.isascii() and h_upg.lower() in ALLOWED_UPGRADES @@ -689,6 +692,7 @@ cdef class HttpResponseParser(HttpParser): cparser.llhttp_set_lenient_headers(self._cparser, 1) cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1) cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1) + self._lax = True cdef object _on_status_complete(self): if self._buf: @@ -702,6 +706,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: pyparser._started = True pyparser._headers = [] + pyparser._seen_singletons = set() pyparser._raw_headers = [] PyByteArray_Resize(pyparser._buf, 0) pyparser._path = None diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index d181eefca8d..161b474ac38 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -75,6 +75,26 @@ r"[\x00-\x08\x0a-\x1f\x7f]" ) +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +# Lowercased for case-insensitive matching against wire names. +SINGLETON_HEADERS: Final[frozenset[str]] = frozenset( + { + "content-length", + "content-location", + "content-range", + "content-type", + "etag", + "host", + "max-forwards", + "server", + "transfer-encoding", + "user-agent", + } +) + class RawRequestMessage(NamedTuple): method: str @@ -204,6 +224,8 @@ def parse_headers( elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value): raise InvalidHeader(bvalue) + if not self._lax and name in headers and name.lower() in SINGLETON_HEADERS: + raise BadHttpMessage(f"Duplicate '{name}' header found.") headers.add(name, value) raw_headers.append((bname, bvalue)) @@ -517,24 +539,6 @@ def parse_headers( upgrade = False chunked = False - # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - singletons = ( - hdrs.CONTENT_LENGTH, - hdrs.CONTENT_LOCATION, - hdrs.CONTENT_RANGE, - hdrs.CONTENT_TYPE, - hdrs.ETAG, - hdrs.HOST, - hdrs.MAX_FORWARDS, - hdrs.SERVER, - hdrs.TRANSFER_ENCODING, - hdrs.USER_AGENT, - ) - bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - # keep-alive and protocol switching # RFC 9110 section 7.6.1 defines Connection as a comma-separated list. conn_values = headers.getall(hdrs.CONNECTION, ()) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 0119005d70d..16e0ab1f558 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -269,32 +269,76 @@ def test_content_length_transfer_encoding(parser: Any) -> None: "hdr", ( "Content-Length", + "Host", + "Transfer-Encoding", + ), +) +def test_duplicate_singleton_header_rejected( + parser: HttpRequestParser, hdr: str +) -> None: + val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr}: {val1}\r\n" + f"{hdr}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +@pytest.mark.parametrize( + "hdr", + ( "Content-Location", "Content-Range", "Content-Type", "ETag", - "Host", "Max-Forwards", "Server", - "Transfer-Encoding", "User-Agent", ), ) -def test_duplicate_singleton_header_rejected( +def test_duplicate_non_security_singleton_header_rejected_strict( parser: HttpRequestParser, hdr: str ) -> None: - val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + """Non-security singletons are rejected in strict mode (requests).""" text = ( f"GET /test HTTP/1.1\r\n" f"Host: example.com\r\n" - f"{hdr}: {val1}\r\n" - f"{hdr}: {val2}\r\n" - f"\r\n" + f"{hdr}: value1\r\n" + f"{hdr}: value2\r\n" + "\r\n" ).encode() with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): parser.feed_data(text) +@pytest.mark.parametrize( + "hdr", + ( + # Content-Length is excluded because llhttp rejects duplicates + # at the C level before our singleton check runs. + "Content-Location", + "Content-Range", + "Content-Type", + "ETag", + "Max-Forwards", + "Server", + "Transfer-Encoding", + "User-Agent", + ), +) +def test_duplicate_singleton_header_accepted_in_lax_mode( + response: HttpResponseParser, hdr: str +) -> None: + """All singleton duplicates are accepted in lax mode (response parser default).""" + text = (f"HTTP/1.1 200 OK\r\n{hdr}: value1\r\n{hdr}: value2\r\n\r\n").encode() + messages, upgrade, tail = response.feed_data(text) + assert len(messages) == 1 + + def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: text = ( b"GET /admin HTTP/1.1\r\n" @@ -306,6 +350,45 @@ def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: parser.feed_data(text) +@pytest.mark.parametrize( + ("hdr1", "hdr2"), + ( + ("content-length", "Content-Length"), + ("Content-Length", "content-length"), + ("transfer-encoding", "Transfer-Encoding"), + ("Transfer-Encoding", "transfer-encoding"), + ), +) +def test_duplicate_singleton_header_different_casing_rejected( + parser: HttpRequestParser, hdr1: str, hdr2: str +) -> None: + """Singleton check must be case-insensitive per RFC 9110.""" + val1, val2 = ("1", "2") if "content-length" in hdr1.lower() else ("v1", "v2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr1}: {val1}\r\n" + f"{hdr2}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +def test_duplicate_host_header_different_casing_rejected( + parser: HttpRequestParser, +) -> None: + """Duplicate Host with different casing must also be rejected.""" + text = ( + b"GET /test HTTP/1.1\r\n" + b"host: evil.example\r\n" + b"Host: good.example\r\n" + b"\r\n" + ) + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + def test_bad_chunked(parser: HttpRequestParser) -> None: """Test that invalid chunked encoding doesn't allow content-length to be used.""" text = ( From 53e2e6fc58b89c6185be7820bd2c9f40216b3000 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:34:37 -1000 Subject: [PATCH 4/5] [PR #12302/2dc02ee0 backport][3.13] Skip duplicate singleton header check in lax mode (#12303) Co-authored-by: J. Nick Koston Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Fixes home-assistant/core#166956 Fixes https://github.com/getmoto/moto/issues/9930 Fixes #12301 Fixes https://github.com/catalyst-cooperative/pudl-archiver/issues/1059 --- CHANGES/12302.bugfix.rst | 3 ++ aiohttp/_http_parser.pyx | 27 ++++++----- aiohttp/http_parser.py | 40 ++++++++-------- tests/test_http_parser.py | 97 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 131 insertions(+), 36 deletions(-) create mode 100644 CHANGES/12302.bugfix.rst diff --git a/CHANGES/12302.bugfix.rst b/CHANGES/12302.bugfix.rst new file mode 100644 index 00000000000..fe9e8fbd624 --- /dev/null +++ b/CHANGES/12302.bugfix.rst @@ -0,0 +1,3 @@ +Skipped the duplicate singleton header check in lax mode (the default for response +parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons +are still enforced -- by :user:`bdraco`. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index d53550b1007..5da835bc642 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -71,8 +71,11 @@ cdef object StreamReader = _StreamReader cdef object DeflateBuffer = _DeflateBuffer cdef bytes EMPTY_BYTES = b"" -# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 -cdef tuple SINGLETON_HEADERS = ( +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +cdef frozenset SINGLETON_HEADERS = frozenset({ hdrs.CONTENT_LENGTH, hdrs.CONTENT_LOCATION, hdrs.CONTENT_RANGE, @@ -83,7 +86,7 @@ cdef tuple SINGLETON_HEADERS = ( hdrs.SERVER, hdrs.TRANSFER_ENCODING, hdrs.USER_AGENT, -) +}) cdef inline object extend(object buf, const char* at, size_t length): cdef Py_ssize_t s @@ -304,6 +307,7 @@ cdef class HttpParser: size_t _max_headers bint _response_with_body bint _read_until_eof + bint _lax bint _started object _url @@ -311,6 +315,7 @@ cdef class HttpParser: str _path str _reason list _headers + set _seen_singletons list _raw_headers bint _upgraded list _messages @@ -377,6 +382,8 @@ cdef class HttpParser: self._upgraded = False self._auto_decompress = auto_decompress self._content_encoding = None + self._lax = False + self._seen_singletons = set() self._csettings.on_url = cb_on_url self._csettings.on_status = cb_on_status @@ -405,6 +412,10 @@ cdef class HttpParser: if "\x00" in value: raise InvalidHeader(self._raw_value) + if not self._lax and name in SINGLETON_HEADERS: + if name in self._seen_singletons: + raise BadHttpMessage(f"Duplicate '{name}' header found.") + self._seen_singletons.add(name) self._headers.append((name, value)) if len(self._headers) > self._max_headers: raise BadHttpMessage("Too many headers received") @@ -444,14 +455,6 @@ cdef class HttpParser: raw_headers = tuple(self._raw_headers) headers = CIMultiDictProxy(CIMultiDict(self._headers)) - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - bad_hdr = next( - (h for h in SINGLETON_HEADERS if len(headers.getall(h, ())) > 1), - None, - ) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - if self._cparser.type == cparser.HTTP_REQUEST: h_upg = headers.get("upgrade", "") allowed = upgrade and h_upg.isascii() and h_upg.lower() in ALLOWED_UPGRADES @@ -689,6 +692,7 @@ cdef class HttpResponseParser(HttpParser): cparser.llhttp_set_lenient_headers(self._cparser, 1) cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1) cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1) + self._lax = True cdef object _on_status_complete(self): if self._buf: @@ -702,6 +706,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: pyparser._started = True pyparser._headers = [] + pyparser._seen_singletons = set() pyparser._raw_headers = [] PyByteArray_Resize(pyparser._buf, 0) pyparser._path = None diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 4889404bebc..6a471cd7996 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -89,6 +89,26 @@ DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +# Lowercased for case-insensitive matching against wire names. +SINGLETON_HEADERS: Final[frozenset[str]] = frozenset( + { + "content-length", + "content-location", + "content-range", + "content-type", + "etag", + "host", + "max-forwards", + "server", + "transfer-encoding", + "user-agent", + } +) + class RawRequestMessage(NamedTuple): method: str @@ -218,6 +238,8 @@ def parse_headers( elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value): raise InvalidHeader(bvalue) + if not self._lax and name in headers and name.lower() in SINGLETON_HEADERS: + raise BadHttpMessage(f"Duplicate '{name}' header found.") headers.add(name, value) raw_headers.append((bname, bvalue)) @@ -531,24 +553,6 @@ def parse_headers( upgrade = False chunked = False - # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - singletons = ( - hdrs.CONTENT_LENGTH, - hdrs.CONTENT_LOCATION, - hdrs.CONTENT_RANGE, - hdrs.CONTENT_TYPE, - hdrs.ETAG, - hdrs.HOST, - hdrs.MAX_FORWARDS, - hdrs.SERVER, - hdrs.TRANSFER_ENCODING, - hdrs.USER_AGENT, - ) - bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - # keep-alive and protocol switching # RFC 9110 section 7.6.1 defines Connection as a comma-separated list. conn_values = headers.getall(hdrs.CONNECTION, ()) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index f3ab7e26d66..25604dbcc4d 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -269,32 +269,76 @@ def test_content_length_transfer_encoding(parser: Any) -> None: "hdr", ( "Content-Length", + "Host", + "Transfer-Encoding", + ), +) +def test_duplicate_singleton_header_rejected( + parser: HttpRequestParser, hdr: str +) -> None: + val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr}: {val1}\r\n" + f"{hdr}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +@pytest.mark.parametrize( + "hdr", + ( "Content-Location", "Content-Range", "Content-Type", "ETag", - "Host", "Max-Forwards", "Server", - "Transfer-Encoding", "User-Agent", ), ) -def test_duplicate_singleton_header_rejected( +def test_duplicate_non_security_singleton_header_rejected_strict( parser: HttpRequestParser, hdr: str ) -> None: - val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + """Non-security singletons are rejected in strict mode (requests).""" text = ( f"GET /test HTTP/1.1\r\n" f"Host: example.com\r\n" - f"{hdr}: {val1}\r\n" - f"{hdr}: {val2}\r\n" - f"\r\n" + f"{hdr}: value1\r\n" + f"{hdr}: value2\r\n" + "\r\n" ).encode() with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): parser.feed_data(text) +@pytest.mark.parametrize( + "hdr", + ( + # Content-Length is excluded because llhttp rejects duplicates + # at the C level before our singleton check runs. + "Content-Location", + "Content-Range", + "Content-Type", + "ETag", + "Max-Forwards", + "Server", + "Transfer-Encoding", + "User-Agent", + ), +) +def test_duplicate_singleton_header_accepted_in_lax_mode( + response: HttpResponseParser, hdr: str +) -> None: + """All singleton duplicates are accepted in lax mode (response parser default).""" + text = (f"HTTP/1.1 200 OK\r\n{hdr}: value1\r\n{hdr}: value2\r\n\r\n").encode() + messages, upgrade, tail = response.feed_data(text) + assert len(messages) == 1 + + def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: text = ( b"GET /admin HTTP/1.1\r\n" @@ -306,6 +350,45 @@ def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: parser.feed_data(text) +@pytest.mark.parametrize( + ("hdr1", "hdr2"), + ( + ("content-length", "Content-Length"), + ("Content-Length", "content-length"), + ("transfer-encoding", "Transfer-Encoding"), + ("Transfer-Encoding", "transfer-encoding"), + ), +) +def test_duplicate_singleton_header_different_casing_rejected( + parser: HttpRequestParser, hdr1: str, hdr2: str +) -> None: + """Singleton check must be case-insensitive per RFC 9110.""" + val1, val2 = ("1", "2") if "content-length" in hdr1.lower() else ("v1", "v2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr1}: {val1}\r\n" + f"{hdr2}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +def test_duplicate_host_header_different_casing_rejected( + parser: HttpRequestParser, +) -> None: + """Duplicate Host with different casing must also be rejected.""" + text = ( + b"GET /test HTTP/1.1\r\n" + b"host: evil.example\r\n" + b"Host: good.example\r\n" + b"\r\n" + ) + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + def test_bad_chunked(parser: HttpRequestParser) -> None: """Test that invalid chunked encoding doesn't allow content-length to be used.""" text = ( From 1c1dfb0bb24f26c98d479c33dad99b7aa3ee74a1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 31 Mar 2026 10:43:13 -1000 Subject: [PATCH 5/5] Release 3.13.5 (#12305) --- CHANGES.rst | 20 ++++++++++++++++++++ CHANGES/12302.bugfix.rst | 3 --- aiohttp/__init__.py | 2 +- 3 files changed, 21 insertions(+), 4 deletions(-) delete mode 100644 CHANGES/12302.bugfix.rst diff --git a/CHANGES.rst b/CHANGES.rst index 156b34231c1..2a7638f50a3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,26 @@ .. towncrier release notes start +3.13.5 (2026-03-31) +=================== + +Bug fixes +--------- + +- Skipped the duplicate singleton header check in lax mode (the default for response + parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons + are still enforced -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`12302`. + + + + +---- + + 3.13.4 (2026-03-28) =================== diff --git a/CHANGES/12302.bugfix.rst b/CHANGES/12302.bugfix.rst deleted file mode 100644 index fe9e8fbd624..00000000000 --- a/CHANGES/12302.bugfix.rst +++ /dev/null @@ -1,3 +0,0 @@ -Skipped the duplicate singleton header check in lax mode (the default for response -parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons -are still enforced -- by :user:`bdraco`. diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 1a22b728e38..404f9267748 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.13.4" +__version__ = "3.13.5" from typing import TYPE_CHECKING, Tuple