Skip to content

Commit 095b586

Browse files
PascalThuetclaude
andcommitted
fix(security): pin tight read bounds on JSON responses; cap actual ZIP bytes
Addresses the Copilot review on PR #2442 and the same pattern elsewhere. - safe_extract_zip(): track the cumulative bytes actually written and fail past max_total_bytes, so the total-size bound holds even if member headers understate file_size (the declared-total check alone could be evaded). Mirrors the existing per-member written guard — defense-in-depth consistency. - Pass an explicit max_bytes to read_response_limited() at every JSON call site instead of inheriting the 50 MiB archive/payload default: * MAX_JSON_METADATA_BYTES (1 MiB): Azure AD token, GitHub release metadata, and the existing latest-release fetch (migrated off an inline literal). * MAX_JSON_CATALOG_BYTES (8 MiB): preset, extension, workflow and integration catalog fetches. Binary/archive downloads keep the 50 MiB ceiling. Both ceilings are centralized as documented constants in _download_security.py. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 5aee553 commit 095b586

9 files changed

Lines changed: 94 additions & 7 deletions

File tree

src/specify_cli/_download_security.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,20 @@
1818
MAX_ZIP_MEMBER_BYTES = 10 * 1024 * 1024
1919
MAX_ZIP_TOTAL_BYTES = 50 * 1024 * 1024
2020
READ_CHUNK_SIZE = 1024 * 1024
21+
22+
# Tighter ceilings for responses that are read fully into memory and parsed as
23+
# JSON. The 50 MiB MAX_DOWNLOAD_BYTES default is sized for archive/payload
24+
# downloads; JSON responses are far smaller, so capping them close to their real
25+
# size shrinks the memory-DoS surface and keeps the "too large" error reachable
26+
# (rather than only triggering on tens of MiB). Pass the matching constant
27+
# explicitly at each JSON call site so the intended bound is pinned there.
28+
# * METADATA — fixed-shape single-object responses (an OAuth token, one
29+
# release's metadata): a few KiB in practice, 1 MiB is already generous.
30+
# * CATALOG — listings that grow with the number of published items. The
31+
# largest bundled catalog is ~130 KiB today, so 8 MiB leaves ~60x headroom
32+
# for growth while staying well under the download ceiling.
33+
MAX_JSON_METADATA_BYTES = 1 * 1024 * 1024
34+
MAX_JSON_CATALOG_BYTES = 8 * 1024 * 1024
2135
SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
2236

2337

@@ -263,6 +277,11 @@ def safe_extract_zip(
263277

264278
normalized_members.append((member, normalized_name))
265279

280+
# The loop above bounds the *declared* total via member.file_size, but a
281+
# crafted archive can understate those headers. Mirror the per-member
282+
# guard below with a cumulative count of the bytes actually written so
283+
# the total-size bound holds even when the headers lie.
284+
total_written = 0
266285
for member, normalized_name in normalized_members:
267286
member_path = target_dir / normalized_name
268287
if member.is_dir():
@@ -298,6 +317,13 @@ def safe_extract_zip(
298317
f"ZIP member {member.filename} exceeds maximum size "
299318
f"of {max_member_bytes} bytes",
300319
)
320+
total_written += len(chunk)
321+
if total_written > max_total_bytes:
322+
_raise(
323+
error_type,
324+
f"ZIP archive exceeds maximum uncompressed size "
325+
f"of {max_total_bytes} bytes",
326+
)
301327
dest.write(chunk)
302328
except (OSError, zipfile.BadZipFile, RuntimeError) as exc:
303329
_raise_from(

src/specify_cli/_github_http.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@ def resolve_github_release_asset_api_url(
121121
import json
122122
import urllib.error
123123

124-
from specify_cli._download_security import read_response_limited
124+
from specify_cli._download_security import (
125+
MAX_JSON_METADATA_BYTES,
126+
read_response_limited,
127+
)
125128

126129
parsed = urlparse(download_url)
127130
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]
@@ -152,7 +155,9 @@ def resolve_github_release_asset_api_url(
152155
with open_url_fn(release_url, timeout=timeout) as response:
153156
release_data = json.loads(
154157
read_response_limited(
155-
response, label=f"GitHub release metadata {release_url}"
158+
response,
159+
max_bytes=MAX_JSON_METADATA_BYTES,
160+
label=f"GitHub release metadata {release_url}",
156161
)
157162
)
158163
# ValueError covers both an oversized body (raised by read_response_limited)

src/specify_cli/_version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from packaging.version import InvalidVersion, Version
3030

3131
from ._console import console
32-
from ._download_security import read_response_limited
32+
from ._download_security import MAX_JSON_METADATA_BYTES, read_response_limited
3333

3434
GITHUB_API_LATEST = "https://api.github.com/repos/github/spec-kit/releases/latest"
3535
_RESOLUTION_FAILURE_OFFLINE = "offline or timeout"
@@ -123,7 +123,7 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]:
123123
payload = json.loads(
124124
read_response_limited(
125125
resp,
126-
max_bytes=1024 * 1024,
126+
max_bytes=MAX_JSON_METADATA_BYTES,
127127
label="GitHub latest release",
128128
).decode("utf-8")
129129
)

src/specify_cli/authentication/azure_devops.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,10 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None:
113113
headers={"Content-Type": "application/x-www-form-urlencoded"},
114114
)
115115
try:
116-
from specify_cli._download_security import read_response_limited
116+
from specify_cli._download_security import (
117+
MAX_JSON_METADATA_BYTES,
118+
read_response_limited,
119+
)
117120
from specify_cli.authentication.http import _StripAuthOnRedirect
118121

119122
# A 307/308 redirect preserves the POST body, which carries the
@@ -125,6 +128,7 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None:
125128
payload = _json.loads(
126129
read_response_limited(
127130
resp,
131+
max_bytes=MAX_JSON_METADATA_BYTES,
128132
error_type=_TokenResponseTooLarge,
129133
label="Azure DevOps token response",
130134
).decode("utf-8")

src/specify_cli/extensions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from packaging.specifiers import SpecifierSet, InvalidSpecifier
2626

2727
from ._download_security import (
28+
MAX_JSON_CATALOG_BYTES,
2829
read_response_limited,
2930
safe_extract_zip,
3031
verify_sha256,
@@ -2022,6 +2023,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False
20222023
catalog_data = json.loads(
20232024
read_response_limited(
20242025
response,
2026+
max_bytes=MAX_JSON_CATALOG_BYTES,
20252027
error_type=ExtensionError,
20262028
label=f"extension catalog {entry.url}",
20272029
)
@@ -2144,6 +2146,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]:
21442146
catalog_data = json.loads(
21452147
read_response_limited(
21462148
response,
2149+
max_bytes=MAX_JSON_CATALOG_BYTES,
21472150
error_type=ExtensionError,
21482151
label=f"extension catalog {catalog_url}",
21492152
)

src/specify_cli/integrations/catalog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import yaml
2222
from packaging import version as pkg_version
2323

24-
from .._download_security import read_response_limited
24+
from .._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited
2525
from ..catalogs import CatalogEntry, CatalogStackBase
2626

2727

@@ -174,6 +174,7 @@ def _fetch_single_catalog(
174174
catalog_data = json.loads(
175175
read_response_limited(
176176
resp,
177+
max_bytes=MAX_JSON_CATALOG_BYTES,
177178
error_type=IntegrationCatalogError,
178179
label=f"integration catalog {entry.url}",
179180
)

src/specify_cli/presets.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from packaging.specifiers import SpecifierSet, InvalidSpecifier
2828

2929
from ._download_security import (
30+
MAX_JSON_CATALOG_BYTES,
3031
read_response_limited,
3132
safe_extract_zip,
3233
verify_sha256,
@@ -2092,6 +2093,7 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool =
20922093
catalog_data = json.loads(
20932094
read_response_limited(
20942095
response,
2096+
max_bytes=MAX_JSON_CATALOG_BYTES,
20952097
error_type=PresetError,
20962098
label=f"preset catalog {entry.url}",
20972099
)
@@ -2191,6 +2193,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]:
21912193
catalog_data = json.loads(
21922194
read_response_limited(
21932195
response,
2196+
max_bytes=MAX_JSON_CATALOG_BYTES,
21942197
error_type=PresetError,
21952198
label=f"preset catalog {catalog_url}",
21962199
)

src/specify_cli/workflows/catalog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import yaml
2121

22-
from specify_cli._download_security import read_response_limited
22+
from specify_cli._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited
2323

2424

2525
# ---------------------------------------------------------------------------
@@ -348,6 +348,7 @@ def _validate_catalog_url(url: str) -> None:
348348
data = json.loads(
349349
read_response_limited(
350350
resp,
351+
max_bytes=MAX_JSON_CATALOG_BYTES,
351352
error_type=WorkflowCatalogError,
352353
label="workflow catalog",
353354
).decode("utf-8")

tests/test_download_security.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,50 @@ def test_safe_extract_zip_rejects_total_uncompressed_size(tmp_path):
228228
safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=5)
229229

230230

231+
def test_safe_extract_zip_bounds_actual_written_bytes_when_headers_understate_size(
232+
tmp_path, monkeypatch
233+
):
234+
# Defense in depth: the pre-extraction check sums the *declared*
235+
# member.file_size values, which a crafted archive can understate so that
236+
# check passes. If the ZIP reader then yields more bytes than the header
237+
# promised, the extraction loop must still abort once the cumulative bytes
238+
# actually written exceed max_total_bytes. CPython's own zipfile happens to
239+
# bound member reads to file_size and CRC-check them, so we substitute a
240+
# reader that does not — exercising our guard rather than the stdlib's.
241+
zip_path = tmp_path / "liar.zip"
242+
with zipfile.ZipFile(zip_path, "w") as zf:
243+
zf.writestr("a.txt", "") # declared file_size 0 ⇒ declared total stays 0
244+
zf.writestr("b.txt", "")
245+
246+
class _OverreadingStream:
247+
"""A member reader that yields more bytes than any header declared."""
248+
249+
def __init__(self, payload: bytes):
250+
self._remaining = payload
251+
252+
def read(self, size: int = -1) -> bytes:
253+
if size is None or size < 0:
254+
size = len(self._remaining)
255+
out, self._remaining = self._remaining[:size], self._remaining[size:]
256+
return out
257+
258+
def __enter__(self):
259+
return self
260+
261+
def __exit__(self, *exc):
262+
return False
263+
264+
# Each member streams 8 bytes despite declaring 0; the per-member cap (10 MiB
265+
# default) is untouched, so only the cumulative guard can stop this.
266+
monkeypatch.setattr(
267+
zipfile.ZipFile, "open", lambda self, *a, **k: _OverreadingStream(b"x" * 8)
268+
)
269+
270+
# 8 bytes for "a.txt" (total 8 ≤ 12), then "b.txt" busts the 12-byte ceiling.
271+
with pytest.raises(ValueError, match="maximum uncompressed size"):
272+
safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=12)
273+
274+
231275
def test_safe_extract_zip_wraps_bad_zip_file(tmp_path):
232276
zip_path = tmp_path / "bad.zip"
233277
zip_path.write_bytes(b"not a zip archive")

0 commit comments

Comments
 (0)