Skip to content

Commit 6a0bd2c

Browse files
PascalThuetclaude
andcommitted
fix: address follow-up Copilot review (error typing, docs, tests)
- __init__.py preset/extension URL installs: give read_response_limited a domain error_type (PresetError / ExtensionError) and catch that instead of a blanket ValueError, so an oversized body is reported cleanly while unrelated ValueErrors surface as real errors. The extension catch now also covers install_from_zip's ValidationError (an ExtensionError). - _utils.run_command: rewrite the misleading docstring — shell=False is the only honoured mode; shell=True is rejected with ValueError, the parameter is retained only so existing keyword callers don't hit TypeError. - _download_security: document that the loopback allowance is an exact-string match (not an IP-range check), that read_response_limited's max_bytes default is the 50 MiB ceiling (callers with tighter budgets should pass an explicit value), and how _safe_zip_name handles single trailing-slash directory markers vs malformed empty segments. - authentication/http: comment the empty-hosts _StripAuthOnRedirect use as the HTTPS-downgrade guard on the unauthenticated path. - check_security_requirements: document the HEAD^ fallback failing safe (audit anyway) on shallow / single-commit checkouts. - security.yml: document the universal committed snapshot vs per-Python scheduled compile distinction. - tests: add a regression test that a symlink alongside benign members is rejected with no partial extraction to disk. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 68bad0c commit 6a0bd2c

7 files changed

Lines changed: 69 additions & 4 deletions

File tree

.github/scripts/check_security_requirements.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ def _dependency_diff_refs() -> tuple[str, str]:
1818
head_ref = os.environ.get("DEPENDENCY_DIFF_HEAD", "").strip() or "HEAD"
1919
if base_ref and not set(base_ref) <= {"0"}:
2020
return base_ref, head_ref
21+
# Fallback when no usable base is supplied (push with an all-zero
22+
# ``github.event.before``, manual dispatch, etc.). ``HEAD^`` fails on a
23+
# shallow checkout or a single-commit repo; that ``git diff`` error is
24+
# caught by the caller and deliberately treated as "inputs changed" so the
25+
# audit runs anyway — failing safe (audit) rather than skipping silently.
2126
return "HEAD^", "HEAD"
2227

2328

.github/workflows/security.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ jobs:
3434
with:
3535
python-version: ${{ matrix.python-version }}
3636

37+
# The committed .github/security-audit-requirements.txt is generated with
38+
# --universal (resolves across all interpreters/platforms) and is what
39+
# push/PR runs audit. The scheduled job instead compiles per matrix
40+
# entry with --python-version so it can surface advisories in wheels that
41+
# only resolve on a specific interpreter (e.g. 3.11-only) — coverage the
42+
# universal file may not exercise. This broadening is intentional; PR runs
43+
# trade that depth for determinism against the committed snapshot.
3744
- name: Compile scheduled audit requirements
3845
if: ${{ github.event_name == 'schedule' }}
3946
run: |

src/specify_cli/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,10 +2114,15 @@ def preset_add(
21142114
zip_path.write_bytes(
21152115
read_response_limited(
21162116
response,
2117+
error_type=PresetError,
21172118
label=f"preset {from_url}",
21182119
)
21192120
)
2120-
except (urllib.error.URLError, ValueError) as e:
2121+
# The URL scheme is validated above, so the only failures here
2122+
# are network errors and an oversized body (raised as PresetError
2123+
# via error_type). Catching those specifically lets unrelated
2124+
# ValueErrors surface instead of masquerading as download errors.
2125+
except (urllib.error.URLError, PresetError) as e:
21212126
console.print(f"[red]Error:[/red] Failed to download: {e}")
21222127
raise typer.Exit(1)
21232128

@@ -3047,13 +3052,18 @@ def extension_add(
30473052
) as response:
30483053
zip_data = read_response_limited(
30493054
response,
3055+
error_type=ExtensionError,
30503056
label=f"extension {from_url}",
30513057
)
30523058
zip_path.write_bytes(zip_data)
30533059

30543060
# Install from downloaded ZIP
30553061
manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority)
3056-
except (urllib.error.URLError, ValueError) as e:
3062+
# ExtensionError covers an oversized body (via error_type) and the
3063+
# ValidationError/ExtensionError raised by install_from_zip; URL
3064+
# scheme is validated above. Catching these instead of a blanket
3065+
# ValueError lets unrelated ValueErrors surface as real errors.
3066+
except (urllib.error.URLError, ExtensionError) as e:
30573067
console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}")
30583068
raise typer.Exit(1)
30593069
finally:

src/specify_cli/_download_security.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ def is_https_or_localhost_http(url: str) -> bool:
2626
2727
Shared redirect-safety predicate used by the GitHub and auth HTTP redirect
2828
handlers so the rule (and any future tightening of it) lives in one place.
29+
30+
The loopback allowance is a deliberate *exact-string* match on
31+
``localhost`` / ``127.0.0.1`` / ``::1``, not an IP-range check: other
32+
loopback addresses (e.g. ``127.0.0.2``) are intentionally not covered.
33+
``urlparse`` already lower-cases the hostname, so the comparison is
34+
case-insensitive.
2935
"""
3036
parsed = urlparse(url)
3137
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
@@ -53,6 +59,12 @@ def read_response_limited(
5359
return fewer even when more data is pending (e.g. chunked transfer encoding),
5460
so a single ``read(max_bytes + 1)`` cannot enforce the bound on its own. Read
5561
in a loop until EOF or until one byte past the limit has been accumulated.
62+
63+
*max_bytes* is keyword-only. It defaults to the module-wide
64+
``MAX_DOWNLOAD_BYTES`` (50 MiB) ceiling for archive/payload downloads;
65+
callers with a tighter budget (e.g. small JSON responses) should pass an
66+
explicit value so the intended bound is pinned at the call site rather than
67+
tracking changes to the shared default.
5668
"""
5769
chunks: list[bytes] = []
5870
total = 0
@@ -111,6 +123,10 @@ def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str:
111123
normalized = name.replace("\\", "/")
112124
path = PurePosixPath(normalized)
113125
raw_parts = normalized.split("/")
126+
# Strip a single trailing empty segment, i.e. the one-slash directory
127+
# marker that legitimate ZIPs use ("mydir/", "mydir/subdir/"). Anything
128+
# else that produces an empty segment — consecutive slashes ("a//b") or a
129+
# second trailing slash — is left in place and rejected below as malformed.
114130
if raw_parts and raw_parts[-1] == "":
115131
raw_parts = raw_parts[:-1]
116132
has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None

src/specify_cli/_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ def run_command(
2424
) -> str | None:
2525
"""Run a command without invoking a shell and optionally capture output.
2626
27-
``shell`` remains accepted for public API compatibility, but shell
28-
execution is intentionally unsupported.
27+
The ``shell`` parameter is kept in the signature so existing keyword
28+
callers (and the re-export from ``specify_cli``) don't raise ``TypeError``,
29+
but only the default ``shell=False`` is honoured. ``shell=True`` is
30+
rejected with ``ValueError`` rather than silently ignored, so the
31+
unsupported mode fails loudly instead of running with a different meaning.
2932
"""
3033
if shell:
3134
raise ValueError(

src/specify_cli/authentication/http.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request:
161161
# No entry worked (or none matched) — unauthenticated fallback
162162
req = _make_req({})
163163
if strict_redirects:
164+
# No auth is attached on this path, so the handler's host list is empty:
165+
# here it acts purely as the HTTPS-downgrade guard (its redirect_request
166+
# rejects non-HTTPS, non-loopback targets), not as an auth-stripper.
164167
opener = urllib.request.build_opener(_StripAuthOnRedirect(()))
165168
return opener.open(req, timeout=timeout)
166169
return urllib.request.urlopen(req, timeout=timeout) # noqa: S310

tests/test_download_security.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,27 @@ def test_safe_extract_zip_rejects_symlinks(tmp_path):
177177
safe_extract_zip(zip_path, tmp_path / "out")
178178

179179

180+
def test_safe_extract_zip_rejects_symlink_without_partial_extraction(tmp_path):
181+
# A symlink sitting next to benign members must be rejected before ANY
182+
# file is written: validation runs over the whole member list first, so an
183+
# unsafe member cannot leak a partially-extracted tree to disk.
184+
zip_path = tmp_path / "mixed.zip"
185+
link = zipfile.ZipInfo("evil-link")
186+
link.external_attr = (stat.S_IFLNK | 0o777) << 16
187+
with zipfile.ZipFile(zip_path, "w") as zf:
188+
zf.writestr("safe/first.txt", "hello")
189+
zf.writestr(link, "target")
190+
zf.writestr("safe/second.txt", "world")
191+
192+
out_dir = tmp_path / "out"
193+
with pytest.raises(ValueError, match="Unsafe symlink"):
194+
safe_extract_zip(zip_path, out_dir)
195+
196+
# Nothing should have been written — not even the benign member that
197+
# precedes the symlink in the archive.
198+
assert not out_dir.exists() or not any(out_dir.rglob("*"))
199+
200+
180201
def test_safe_extract_zip_rejects_oversized_member(tmp_path):
181202
zip_path = tmp_path / "bad.zip"
182203
with zipfile.ZipFile(zip_path, "w") as zf:

0 commit comments

Comments
 (0)