From d6b20ba6fdf6988d1fa57a34f8bcfb7234384dd0 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 23 Jun 2026 23:57:36 +0200 Subject: [PATCH 1/3] harden: bound HTTP reads and enforce strict redirects Add a shared _download_security module (read_response_limited, is_https_or_localhost_http, size constants) and route the GitHub release and Azure DevOps token network reads through bounded reads so an oversized response can't exhaust memory. Add a strict_redirects mode to authentication.open_url: the redirect handler now rejects any redirect whose target isn't HTTPS (or HTTP to localhost), composing with the existing per-hop redirect_validator and auth-stripping. The Azure DevOps token POST is routed through that handler so a 307/308 cannot forward the client_secret body to a non-HTTPS host. --- src/specify_cli/_download_security.py | 83 +++++++++++++++++++ src/specify_cli/_github_http.py | 18 +++- src/specify_cli/_version.py | 14 +++- .../authentication/azure_devops.py | 33 +++++++- src/specify_cli/authentication/http.py | 38 ++++++++- tests/http_helpers.py | 33 +++++++- tests/self_upgrade_helpers.py | 3 +- tests/test_authentication.py | 51 +++++++++--- tests/test_download_security.py | 68 +++++++++++++++ tests/test_github_http.py | 52 +++++++++++- tests/test_self_upgrade_detection.py | 1 + tests/test_self_upgrade_execution.py | 1 + tests/test_self_upgrade_verification.py | 1 + tests/test_upgrade.py | 61 +++++++++++++- 14 files changed, 425 insertions(+), 32 deletions(-) create mode 100644 src/specify_cli/_download_security.py create mode 100644 tests/test_download_security.py diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py new file mode 100644 index 0000000000..26287707c9 --- /dev/null +++ b/src/specify_cli/_download_security.py @@ -0,0 +1,83 @@ +"""Helpers for bounded HTTP downloads.""" + +from __future__ import annotations + +from typing import NoReturn, TypeVar +from urllib.parse import urlparse + + +ErrorT = TypeVar("ErrorT", bound=Exception) + +MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024 +READ_CHUNK_SIZE = 1024 * 1024 + +# Tighter ceiling for responses that are read fully into memory and parsed as +# JSON. The 50 MiB MAX_DOWNLOAD_BYTES default is sized for archive/payload +# downloads; JSON metadata responses are far smaller, so capping them close to +# their real size shrinks the memory-DoS surface and keeps the "too large" +# error reachable (rather than only triggering on tens of MiB). Pass it +# explicitly at each JSON call site so the intended bound is pinned there. +# METADATA covers fixed-shape single-object responses (an OAuth token, one +# release's metadata): a few KiB in practice, 1 MiB is already generous. +MAX_JSON_METADATA_BYTES = 1 * 1024 * 1024 + + +def is_https_or_localhost_http(url: str) -> bool: + """Return True if *url* is HTTPS, or HTTP limited to loopback hosts. + + Shared scheme-safety predicate used by the auth HTTP redirect handler and + by the direct URL validations in the CLI download flows, so the rule (and + any future tightening of it) lives in one place. + + A hostname is always required: a URL without one (e.g. ``https:///x``) + has no real target and is rejected regardless of scheme. + + The loopback allowance is a deliberate *exact-string* match on + ``localhost`` / ``127.0.0.1`` / ``::1``, not an IP-range check: other + loopback addresses (e.g. ``127.0.0.2``) are intentionally not covered. + ``urlparse`` already lower-cases the hostname, so the comparison is + case-insensitive. + """ + parsed = urlparse(url) + if not parsed.hostname: + return False + is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") + return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) + + +def _raise(error_type: type[ErrorT], message: str) -> NoReturn: + raise error_type(message) + + +def read_response_limited( + response, + *, + max_bytes: int = MAX_DOWNLOAD_BYTES, + error_type: type[ErrorT] = ValueError, + label: str = "download", +) -> bytes: + """Read at most *max_bytes* from a response object. + + ``response.read(n)`` is only guaranteed to return *up to* ``n`` bytes and may + return fewer even when more data is pending (e.g. chunked transfer encoding), + so a single ``read(max_bytes + 1)`` cannot enforce the bound on its own. Read + in a loop until EOF or until one byte past the limit has been accumulated. + + *max_bytes* is keyword-only. It defaults to the module-wide + ``MAX_DOWNLOAD_BYTES`` (50 MiB) ceiling for archive/payload downloads; + callers with a tighter budget (e.g. small JSON responses) should pass an + explicit value so the intended bound is pinned at the call site rather than + tracking changes to the shared default. + """ + chunks: list[bytes] = [] + total = 0 + limit = max_bytes + 1 + while total < limit: + chunk = response.read(min(READ_CHUNK_SIZE, limit - total)) + if not chunk: + break + chunks.append(chunk) + total += len(chunk) + if total > max_bytes: + _raise(error_type, f"{label} exceeds maximum size of {max_bytes} bytes") + return b"".join(chunks) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index d2030b57a8..e9a5f7a4b1 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -91,6 +91,11 @@ def resolve_github_release_asset_api_url( import json import urllib.error + from specify_cli._download_security import ( + MAX_JSON_METADATA_BYTES, + read_response_limited, + ) + parsed = urlparse(download_url) parts = [unquote(part) for part in parsed.path.strip("/").split("/")] @@ -118,8 +123,17 @@ def resolve_github_release_asset_api_url( try: with open_url_fn(release_url, timeout=timeout) as response: - release_data = json.loads(response.read()) - except (urllib.error.URLError, json.JSONDecodeError): + release_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_METADATA_BYTES, + label=f"GitHub release metadata {release_url}", + ) + ) + # ValueError covers both an oversized body (raised by read_response_limited) + # and json.JSONDecodeError (a ValueError subclass); on any of these, fall + # back to the original URL by returning None. + except (urllib.error.URLError, ValueError): return None for asset in release_data.get("assets", []): diff --git a/src/specify_cli/_version.py b/src/specify_cli/_version.py index e634a4f286..7720cf2ab6 100644 --- a/src/specify_cli/_version.py +++ b/src/specify_cli/_version.py @@ -4,8 +4,8 @@ release tag. The ``self_app`` Typer sub-command group is co-located here so all version-related logic lives in one place. -Dependencies: stdlib + packaging + ._console only (no other internal imports -at module level, keeping this layer thin and circular-import-safe). +Dependencies: stdlib + packaging + ._console + ._download_security only +(keeping this layer thin and circular-import-safe). """ from __future__ import annotations @@ -28,6 +28,7 @@ import typer from packaging.version import InvalidVersion, Version +from ._download_security import MAX_JSON_METADATA_BYTES, read_response_limited from ._console import console GITHUB_API_LATEST = "https://api.github.com/repos/github/spec-kit/releases/latest" @@ -118,8 +119,15 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]: GITHUB_API_LATEST, timeout=5, extra_headers={"Accept": "application/vnd.github+json"}, + strict_redirects=True, ) as resp: - payload = json.loads(resp.read().decode("utf-8")) + payload = json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_METADATA_BYTES, + label="GitHub latest release", + ).decode("utf-8") + ) tag = payload.get("tag_name") if not isinstance(tag, str) or not tag: raise ValueError("GitHub API response missing valid tag_name") diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index 5d71a1957b..72e25de92b 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -8,6 +8,7 @@ import subprocess from typing import TYPE_CHECKING +from .._download_security import MAX_JSON_METADATA_BYTES, read_response_limited from .base import AuthProvider if TYPE_CHECKING: @@ -17,6 +18,10 @@ _ADO_RESOURCE_ID = "499b84ac-1321-427f-aa17-267ca6975798" +class _TokenResponseTooLarge(Exception): + """Raised when an Azure AD token response exceeds the bounded read limit.""" + + class AzureDevOpsAuth(AuthProvider): """Azure DevOps authentication provider. @@ -109,9 +114,31 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: headers={"Content-Type": "application/x-www-form-urlencoded"}, ) try: - with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310 - payload = _json.loads(resp.read().decode("utf-8")) + from specify_cli.authentication.http import _StripAuthOnRedirect + + # A 307/308 redirect preserves the POST body, which carries the + # client_secret. Reuse the package HTTPS-downgrade guard (empty host + # list means no auth header to strip, just the scheme check) so the + # secret can never be forwarded to a non-HTTPS, non-loopback host. + opener = urllib.request.build_opener(_StripAuthOnRedirect(())) + with opener.open(req, timeout=30) as resp: # noqa: S310 + payload = _json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_METADATA_BYTES, + error_type=_TokenResponseTooLarge, + label="Azure DevOps token response", + ).decode("utf-8") + ) token = payload.get("access_token", "").strip() return token or None - except (urllib.error.URLError, OSError, _json.JSONDecodeError, KeyError): + except ( + urllib.error.URLError, + OSError, + _json.JSONDecodeError, + _TokenResponseTooLarge, + ): + # Network failure, malformed JSON, or an oversized response — fall + # through to the next strategy. Unrelated programming errors (other + # ValueErrors, KeyErrors) intentionally propagate so they surface. return None diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index e8ab8c1241..1aeacce9f8 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -17,6 +17,7 @@ from typing import Callable from urllib.parse import urlparse +from .._download_security import is_https_or_localhost_http from . import get_provider from .config import AuthConfigEntry, _default_config_path, find_entries_for_url, load_auth_config @@ -60,8 +61,23 @@ def _hostname_in_hosts(hostname: str, hosts: tuple[str, ...]) -> bool: RedirectValidator = Callable[[str, str], None] +def _validate_strict_redirect(_old_url: str, new_url: str) -> None: + if not is_https_or_localhost_http(new_url): + raise urllib.error.URLError( + "unsafe redirect: target must use HTTPS with a hostname, " + "or HTTP for localhost (127.0.0.1, ::1)" + ) + + class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - """Drop ``Authorization`` when a redirect leaves trusted hosts or downgrades.""" + """Redirect handler that guards every redirect it is installed for. + + 1. Run any caller-provided redirect validator. + 2. Reject redirects that are not HTTPS with a hostname, except HTTP to + localhost / 127.0.0.1 / ::1 (the exact hosts allowed by + ``is_https_or_localhost_http``). + 3. Drop ``Authorization`` when a redirect leaves trusted hosts or downgrades. + """ def __init__( self, @@ -75,6 +91,7 @@ def __init__( def redirect_request(self, req, fp, code, msg, headers, newurl): if self._redirect_validator is not None: self._redirect_validator(req.full_url, newurl) + _validate_strict_redirect(req.full_url, newurl) original_auth = ( req.get_header("Authorization") @@ -123,6 +140,7 @@ def open_url( timeout: int = 10, extra_headers: dict[str, str] | None = None, redirect_validator: RedirectValidator | None = None, + strict_redirects: bool = False, ): """Open *url* with config-driven auth, redirect stripping, and fallthrough. @@ -135,9 +153,19 @@ def open_url( *extra_headers* (e.g. ``Accept``) are merged into every attempt. *redirect_validator*, when provided, is called with ``(old_url, new_url)`` before following each redirect and may raise to reject the redirect. + + Redirect scheme safety: every authenticated attempt goes through + ``_StripAuthOnRedirect``, which always rejects redirects to non-HTTPS + URLs (except HTTP to localhost / 127.0.0.1 / ::1, the hosts allowed by + ``is_https_or_localhost_http``). The unauthenticated fallback installs the + same handler when *strict_redirects* is true or *redirect_validator* is + supplied; without either, it follows redirects without that handler. """ entries = find_entries_for_url(url, _load_config()) + effective_redirect_validator = redirect_validator + use_redirect_handler = strict_redirects or effective_redirect_validator is not None + def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request: merged = {} if extra_headers: @@ -157,7 +185,7 @@ def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request: continue req = _make_req(provider.auth_headers(token, entry.auth)) - opener = urllib.request.build_opener(_StripAuthOnRedirect(entry.hosts, redirect_validator)) + opener = urllib.request.build_opener(_StripAuthOnRedirect(entry.hosts, effective_redirect_validator)) try: return opener.open(req, timeout=timeout) except urllib.error.HTTPError as exc: @@ -168,7 +196,9 @@ def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request: # No entry worked (or none matched) — unauthenticated fallback req = _make_req({}) - if redirect_validator is not None: - opener = urllib.request.build_opener(_StripAuthOnRedirect((), redirect_validator)) + if use_redirect_handler: + # No auth is attached on this path, so the handler's host list is empty: + # here it runs redirect validation only, not auth stripping. + opener = urllib.request.build_opener(_StripAuthOnRedirect((), effective_redirect_validator)) return opener.open(req, timeout=timeout) return urllib.request.urlopen(req, timeout=timeout) # noqa: S310 diff --git a/tests/http_helpers.py b/tests/http_helpers.py index 46e26806b4..00f549e397 100644 --- a/tests/http_helpers.py +++ b/tests/http_helpers.py @@ -1,15 +1,46 @@ """HTTP test helpers shared by version-related CLI tests.""" +import io import json +import urllib.request from unittest.mock import MagicMock +import pytest + def mock_urlopen_response(payload: dict) -> MagicMock: """Build a urlopen context-manager mock whose read returns JSON.""" body = json.dumps(payload).encode("utf-8") resp = MagicMock() - resp.read.return_value = body + resp.read.side_effect = io.BytesIO(body).read cm = MagicMock() cm.__enter__.return_value = resp cm.__exit__.return_value = False return cm + + +@pytest.fixture(autouse=True) +def route_opener_open_through_urlopen(monkeypatch): + """Route build_opener().open through urllib.request.urlopen. + + ``open_url(..., strict_redirects=True)`` fetches via + ``build_opener(...).open()``, which bypasses ``urllib.request.urlopen`` + — and with it the urlopen patches these test modules are built on. + Delegating ``open()`` to urlopen at call time keeps those patches + effective; the redirect handler's own behavior is covered by + ``TestRedirectStripping`` in test_authentication.py. + + Import this fixture into a test module to activate it there. + """ + + class _UrlopenDelegatingOpener: + def open(self, req, data=None, timeout=None): + if data is None: + return urllib.request.urlopen(req, timeout=timeout) + return urllib.request.urlopen(req, data=data, timeout=timeout) + + monkeypatch.setattr( + urllib.request, + "build_opener", + lambda *handlers: _UrlopenDelegatingOpener(), + ) diff --git a/tests/self_upgrade_helpers.py b/tests/self_upgrade_helpers.py index c363f57b13..fc0f339f92 100644 --- a/tests/self_upgrade_helpers.py +++ b/tests/self_upgrade_helpers.py @@ -18,7 +18,7 @@ _verify_upgrade, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import mock_urlopen_response, route_opener_open_through_urlopen __all__ = ( "SENTINEL_GH_TOKEN", @@ -31,6 +31,7 @@ "_verify_upgrade", "mock_urlopen_response", "requires_posix", + "route_opener_open_through_urlopen", "runner", "strip_ansi", ) diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 8b09245384..cce3ad9a7b 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -14,6 +14,7 @@ from __future__ import annotations import base64 +import io import json import os @@ -497,10 +498,15 @@ def test_resolve_token_azure_ad_success(self, monkeypatch): tenant_id="tid", client_id="cid", client_secret_env="MY_SECRET", ) mock_resp = MagicMock() - mock_resp.read.return_value = b'{"access_token": "ad-acquired-token"}' + mock_resp.read.side_effect = io.BytesIO(b'{"access_token": "ad-acquired-token"}').read mock_resp.__enter__ = lambda s: s mock_resp.__exit__ = MagicMock(return_value=False) - with patch("urllib.request.urlopen", return_value=mock_resp): + # The token request goes through a strict-redirect opener (so a 307/308 + # cannot forward the client_secret body to a non-HTTPS host), not bare + # urlopen; patch the opener it builds. + mock_opener = MagicMock() + mock_opener.open.return_value = mock_resp + with patch("urllib.request.build_opener", return_value=mock_opener): assert AzureDevOpsAuth().resolve_token(entry) == "ad-acquired-token" def test_resolve_token_azure_ad_missing_secret_returns_none(self, monkeypatch): @@ -793,17 +799,18 @@ def test_redirect_outside_hosts_strips_auth(self): assert new_req.headers.get("Authorization") is None assert new_req.unredirected_hdrs.get("Authorization") is None - def test_https_to_http_same_host_redirect_strips_auth(self): + def test_https_to_http_same_host_redirect_rejected(self): from specify_cli.authentication.http import _StripAuthOnRedirect from urllib.request import Request import io + import urllib.error + handler = _StripAuthOnRedirect(("github.com",)) req = Request("https://github.com/org/repo", headers={"Authorization": "Bearer tok"}) - new_req = handler.redirect_request(req, io.BytesIO(b""), 302, "Found", {}, - "http://github.com/org/repo") - assert new_req is not None - assert new_req.headers.get("Authorization") is None - assert new_req.unredirected_hdrs.get("Authorization") is None + + with pytest.raises(urllib.error.URLError, match="unsafe redirect"): + handler.redirect_request(req, io.BytesIO(b""), 302, "Found", {}, + "http://github.com/org/repo") def test_redirect_validator_can_reject_before_following_redirect(self): import urllib.error @@ -845,6 +852,18 @@ def test_multi_hop_redirect_within_hosts_preserves_auth(self): auth3 = req3.get_header("Authorization") or req3.unredirected_hdrs.get("Authorization") assert auth3 == "Bearer tok" + def test_redirect_rejects_https_downgrade(self): + """HTTPS downloads must not follow redirects to non-local HTTP URLs.""" + from specify_cli.authentication.http import _StripAuthOnRedirect + from urllib.request import Request + import io + import urllib.error + handler = _StripAuthOnRedirect(("example.com",)) + req = Request("https://example.com/archive.zip") + with pytest.raises(urllib.error.URLError, match="unsafe redirect"): + handler.redirect_request(req, io.BytesIO(b""), 302, "Found", {}, + "http://evil.example.com/archive.zip") + # --------------------------------------------------------------------------- # _fetch_latest_release_tag delegation @@ -864,7 +883,7 @@ def side_effect(req, timeout=None): captured["request"] = req body = _json.dumps({"tag_name": "v9.9.9"}).encode() resp = MagicMock() - resp.read.return_value = body + resp.read.side_effect = io.BytesIO(body).read cm = MagicMock() cm.__enter__.return_value = resp cm.__exit__.return_value = False @@ -884,19 +903,25 @@ def test_gh_token_forwarded_when_configured(self, monkeypatch): assert captured["request"].get_header("Authorization") == "Bearer forwarded-sentinel" def test_no_config_means_no_auth(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + # The release fetch uses strict_redirects=True, so the unauthenticated + # path goes through build_opener().open(), not urlopen. + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Authorization") is None def test_accept_header_present(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Accept") == "application/vnd.github+json" diff --git a/tests/test_download_security.py b/tests/test_download_security.py new file mode 100644 index 0000000000..24eeedd959 --- /dev/null +++ b/tests/test_download_security.py @@ -0,0 +1,68 @@ +"""Tests for bounded HTTP download helpers.""" + +from __future__ import annotations + +import pytest + +from specify_cli._download_security import ( + is_https_or_localhost_http, + read_response_limited, +) + + +@pytest.mark.parametrize( + "url, allowed", + [ + ("https://example.com/preset.zip", True), + ("http://localhost:8000/preset.zip", True), + ("http://127.0.0.1/preset.zip", True), + ("http://[::1]/preset.zip", True), + # Non-loopback HTTP is rejected. + ("http://example.com/preset.zip", False), + # Loopback allowance is an exact-string match: 127.0.0.2 is not covered. + ("http://127.0.0.2/preset.zip", False), + # A hostname is always required, even for HTTPS. + ("https:///preset.zip", False), + ("https://", False), + ], +) +def test_is_https_or_localhost_http(url, allowed): + assert is_https_or_localhost_http(url) is allowed + + +class _Response: + """Faithful stream stand-in: read() advances a cursor and returns b"" at EOF.""" + + def __init__(self, data: bytes, *, chunk: int | None = None): + self.data = data + self.pos = 0 + # When set, never return more than *chunk* bytes per call even if more is + # requested - simulates short reads (e.g. chunked transfer encoding). + self.chunk = chunk + + def read(self, size: int = -1) -> bytes: + if size < 0: + size = len(self.data) - self.pos + if self.chunk is not None: + size = min(size, self.chunk) + out = self.data[self.pos : self.pos + size] + self.pos += len(out) + return out + + +def test_read_response_limited_rejects_oversized_download(): + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(_Response(b"abcde"), max_bytes=4) + + +def test_read_response_limited_returns_full_body_within_limit(): + assert read_response_limited(_Response(b"abcde"), max_bytes=10) == b"abcde" + + +def test_read_response_limited_enforces_bound_under_short_reads(): + # A server that streams more than max_bytes total while every read() returns + # fewer bytes than requested (chunked encoding) must still be rejected - a + # single read(max_bytes + 1) could be fooled, the accumulating loop cannot. + response = _Response(b"x" * 100, chunk=8) + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(response, max_bytes=16) diff --git a/tests/test_github_http.py b/tests/test_github_http.py index e258f4917f..0fb82b5b99 100644 --- a/tests/test_github_http.py +++ b/tests/test_github_http.py @@ -1,16 +1,20 @@ """Tests for GitHub-authenticated HTTP request helpers.""" +import io import json import os from contextlib import contextmanager from unittest.mock import MagicMock, patch +from urllib.request import Request import pytest from specify_cli._github_http import ( + GITHUB_HOSTS, build_github_request, resolve_github_release_asset_api_url, ) +from specify_cli.authentication.http import _StripAuthOnRedirect class TestBuildGitHubRequest: @@ -90,7 +94,7 @@ def _make_open_url_fn(self, release_json): @contextmanager def fake_open(url, timeout=None, extra_headers=None): resp = MagicMock() - resp.read.return_value = json.dumps(release_json).encode() + resp.read.side_effect = io.BytesIO(json.dumps(release_json).encode()).read yield resp return fake_open @@ -144,7 +148,7 @@ def test_returns_none_on_network_error(self): @contextmanager def failing_open(url, timeout=None, extra_headers=None): raise urllib.error.URLError("network error") - yield # noqa: unreachable + yield # pragma: no cover result = resolve_github_release_asset_api_url( "https://github.com/org/repo/releases/download/v1/pack.zip", @@ -160,7 +164,7 @@ def test_tag_with_special_characters_is_url_encoded(self): def capturing_open(url, timeout=None, extra_headers=None): captured_urls.append(url) resp = MagicMock() - resp.read.return_value = json.dumps({"assets": []}).encode() + resp.read.side_effect = io.BytesIO(json.dumps({"assets": []}).encode()).read yield resp resolve_github_release_asset_api_url( @@ -179,7 +183,7 @@ def test_tag_with_hash_is_url_encoded(self): def capturing_open(url, timeout=None, extra_headers=None): captured_urls.append(url) resp = MagicMock() - resp.read.return_value = json.dumps({"assets": []}).encode() + resp.read.side_effect = io.BytesIO(json.dumps({"assets": []}).encode()).read yield resp resolve_github_release_asset_api_url( @@ -188,3 +192,43 @@ def capturing_open(url, timeout=None, extra_headers=None): ) assert len(captured_urls) == 1 assert "releases/tags/v1%23beta" in captured_urls[0] + + +class TestGitHubRedirectAuth: + """Tests for GitHub-owned redirect auth handling.""" + + def test_multi_hop_github_redirect_preserves_unredirected_auth(self): + """Auth survives a multi-hop redirect chain within GitHub hosts.""" + handler = _StripAuthOnRedirect(tuple(GITHUB_HOSTS)) + req1 = Request( + "https://github.com/org/repo", + headers={"Authorization": "Bearer tok"}, + ) + + req2 = handler.redirect_request( + req1, + io.BytesIO(b""), + 302, + "Found", + {}, + "https://codeload.github.com/org/repo/zip", + ) + assert req2 is not None + auth2 = req2.get_header("Authorization") or req2.unredirected_hdrs.get( + "Authorization" + ) + assert auth2 == "Bearer tok" + + req3 = handler.redirect_request( + req2, + io.BytesIO(b""), + 302, + "Found", + {}, + "https://raw.githubusercontent.com/org/repo/main/file", + ) + assert req3 is not None + auth3 = req3.get_header("Authorization") or req3.unredirected_hdrs.get( + "Authorization" + ) + assert auth3 == "Bearer tok" diff --git a/tests/test_self_upgrade_detection.py b/tests/test_self_upgrade_detection.py index ab575e7435..73b55ebb79 100644 --- a/tests/test_self_upgrade_detection.py +++ b/tests/test_self_upgrade_detection.py @@ -13,6 +13,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _InstallMethod, _assemble_installer_argv, _completed_process, diff --git a/tests/test_self_upgrade_execution.py b/tests/test_self_upgrade_execution.py index 6696b4fc79..5c761014be 100644 --- a/tests/test_self_upgrade_execution.py +++ b/tests/test_self_upgrade_execution.py @@ -7,6 +7,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _completed_process, mock_urlopen_response, requires_posix, diff --git a/tests/test_self_upgrade_verification.py b/tests/test_self_upgrade_verification.py index f1a018f06c..c4e7eecf1b 100644 --- a/tests/test_self_upgrade_verification.py +++ b/tests/test_self_upgrade_verification.py @@ -8,6 +8,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) SENTINEL_GH_TOKEN, SENTINEL_GITHUB_TOKEN, _InstallMethod, diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index 3ad8c84f62..6a8b069b5c 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -9,6 +9,8 @@ `--disable-socket` as an extra safety net. """ +import io +import json import urllib.error import importlib.metadata from unittest.mock import MagicMock, patch @@ -17,6 +19,7 @@ from typer.testing import CliRunner from specify_cli import app +from specify_cli._download_security import read_response_limited as _real_read_response_limited from specify_cli._version import ( _fetch_latest_release_tag, _get_installed_version, @@ -24,7 +27,10 @@ _normalize_tag, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import ( + mock_urlopen_response, + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) +) runner = CliRunner() @@ -36,6 +42,19 @@ ) +def _mock_urlopen_response(payload: dict) -> MagicMock: + body = json.dumps(payload).encode("utf-8") + resp = MagicMock() + # Back read() with a real stream so it advances and returns b"" at EOF, + # matching http.client.HTTPResponse (a fixed return_value would loop forever + # under read_response_limited's bounded read loop). + resp.read.side_effect = io.BytesIO(body).read + cm = MagicMock() + cm.__enter__.return_value = resp + cm.__exit__.return_value = False + return cm + + def _http_error(code: int, message: str = "error") -> urllib.error.HTTPError: return urllib.error.HTTPError( url="https://api.github.com/repos/github/spec-kit/releases/latest", @@ -235,6 +254,46 @@ def test_generic_exception_propagates(self): _fetch_latest_release_tag() +class TestBoundedRead: + """Regression test for the read_response_limited hardening. + + A future refactor could silently revert `_fetch_latest_release_tag` to + `resp.read()` (the unbounded form) — this test pins the contract that + the response body is read through ``read_response_limited`` with a + bounded ``max_bytes``. + """ + + def test_response_body_is_bounded(self): + recorded: dict[str, int | str] = {} + + def _spy(response, *, max_bytes: int, label: str, **kwargs): + # max_bytes and label are keyword-only with no defaults: if the + # caller forgets to pass either, the call raises TypeError here + # (instead of recording a misleading None). + recorded["max_bytes"] = max_bytes + recorded["label"] = label + # Forward to the real implementation so the function under test + # still gets a parseable body. + return _real_read_response_limited( + response, max_bytes=max_bytes, label=label, **kwargs + ) + + with patch( + "specify_cli.authentication.http.urllib.request.urlopen", + return_value=_mock_urlopen_response({"tag_name": "v9.9.9"}), + ), patch("specify_cli._version.read_response_limited", side_effect=_spy): + tag, reason = _fetch_latest_release_tag() + + assert tag == "v9.9.9" + assert reason is None + # The cap (1 MiB) is a deliberate ceiling for the GitHub release + # JSON — keep it explicit so a future refactor that drops the + # `max_bytes=` argument fails this test instead of regressing + # silently to the default. + assert recorded["max_bytes"] == 1024 * 1024 + assert recorded["label"] == "GitHub latest release" + + _FAILURE_CASES = [ ("offline or timeout", urllib.error.URLError("down")), (_RATE_LIMITED_REASON, _http_error(403)), From f1ca6c48091d498d2200756e89f9223cc3fcd988 Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 24 Jun 2026 00:36:17 +0200 Subject: [PATCH 2/3] test: align HTTP fakes with bounded reads Assisted-by: Codex (model: GPT-5, autonomous) --- tests/test_extensions.py | 6 +++--- tests/test_presets.py | 15 ++++++++++----- tests/test_workflows.py | 27 +++++++++++++++++++++------ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index c60a7e430f..b4f00858c6 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -3670,7 +3670,7 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): zip_bytes = zip_buf.getvalue() release_response = MagicMock() - release_response.read.return_value = json.dumps( + release_response.read.side_effect = io.BytesIO(json.dumps( { "assets": [ { @@ -3679,12 +3679,12 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): } ] } - ).encode() + ).encode()).read release_response.__enter__ = lambda s: s release_response.__exit__ = MagicMock(return_value=False) asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) diff --git a/tests/test_presets.py b/tests/test_presets.py index 58574bbc9c..a559ae1dfb 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1972,7 +1972,7 @@ def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): zip_bytes = zip_buf.getvalue() release_response = MagicMock() - release_response.read.return_value = json.dumps( + release_response.read.side_effect = io.BytesIO(json.dumps( { "assets": [ { @@ -1981,12 +1981,12 @@ def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): } ] } - ).encode() + ).encode()).read release_response.__enter__ = lambda s: s release_response.__exit__ = MagicMock(return_value=False) asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) @@ -4577,9 +4577,14 @@ def test_preset_add_from_github_release_url_resolves_and_downloads(self, project class FakeResponse: def __init__(self, data): self._data = data + self._pos = 0 - def read(self): - return self._data + def read(self, size=-1): + if size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def __enter__(self): return self diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 512b354158..36c724adbb 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -5163,10 +5163,15 @@ def test_workflow_add_from_github_release_url_resolves_and_downloads(self, proje class FakeResponse: def __init__(self, data, url=None): self._data = data + self._pos = 0 self._url = url or "https://api.github.com/repos/org/repo/releases/assets/42" - def read(self): - return self._data + def read(self, size=-1): + if size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url @@ -5215,10 +5220,15 @@ def test_workflow_add_from_direct_api_asset_url_passes_through(self, project_dir class FakeResponse: def __init__(self, data, url=None): self._data = data + self._pos = 0 self._url = url or "https://api.github.com/repos/org/repo/releases/assets/42" - def read(self): - return self._data + def read(self, size=-1): + if size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url @@ -5258,10 +5268,15 @@ def test_workflow_add_catalog_based_resolves_github_release_url(self, project_di class FakeResponse: def __init__(self, data, url=None): self._data = data + self._pos = 0 self._url = url or "https://api.github.com/repos/org/repo/releases/assets/55" - def read(self): - return self._data + def read(self, size=-1): + if size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url From 885f8fcc75bcb4fc4626fe832c020fd4fcfebbfa Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 24 Jun 2026 15:51:45 +0200 Subject: [PATCH 3/3] fix: tolerate invalid token response encoding Assisted-by: Codex (model: GPT-5, autonomous) --- src/specify_cli/authentication/azure_devops.py | 1 + tests/test_authentication.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index 72e25de92b..2f0548e54b 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -136,6 +136,7 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: urllib.error.URLError, OSError, _json.JSONDecodeError, + UnicodeDecodeError, _TokenResponseTooLarge, ): # Network failure, malformed JSON, or an oversized response — fall diff --git a/tests/test_authentication.py b/tests/test_authentication.py index cce3ad9a7b..1756b84e8c 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -531,6 +531,24 @@ def test_resolve_token_azure_ad_network_error_returns_none(self, monkeypatch): side_effect=urllib.error.URLError("connection refused")): assert AzureDevOpsAuth().resolve_token(entry) is None + def test_resolve_token_azure_ad_invalid_utf8_returns_none(self, monkeypatch): + """azure-ad returns None when the token response is not valid UTF-8.""" + from unittest.mock import MagicMock, patch + monkeypatch.setenv("MY_SECRET", "secret-value") + entry = AuthConfigEntry( + hosts=("dev.azure.com",), provider="azure-devops", auth="azure-ad", + tenant_id="tid", client_id="cid", client_secret_env="MY_SECRET", + ) + mock_resp = MagicMock() + mock_resp.read.side_effect = io.BytesIO(b"\xff").read + mock_resp.__enter__ = lambda s: s + mock_resp.__exit__ = MagicMock(return_value=False) + mock_opener = MagicMock() + mock_opener.open.return_value = mock_resp + + with patch("urllib.request.build_opener", return_value=mock_opener): + assert AzureDevOpsAuth().resolve_token(entry) is None + # --------------------------------------------------------------------------- # open_url / build_request — positive tests