Skip to content

Commit ae66617

Browse files
committed
fix: enforce strict redirects for catalog downloads
1 parent e3f0153 commit ae66617

9 files changed

Lines changed: 463 additions & 53 deletions

File tree

src/specify_cli/__init__.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,11 @@ def extension_add(
10271027
from specify_cli._download_security import read_response_limited as _read_response_limited
10281028
from specify_cli.authentication.http import open_url as _open_url
10291029

1030-
with _open_url(from_url, timeout=60) as response:
1030+
with _open_url(
1031+
from_url,
1032+
timeout=60,
1033+
strict_redirects=True,
1034+
) as response:
10311035
zip_data = _read_response_limited(
10321036
response,
10331037
error_type=ExtensionError,
@@ -2479,6 +2483,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
24792483
# Try as URL (http/https)
24802484
if source.startswith("http://") or source.startswith("https://"):
24812485
from functools import partial
2486+
from urllib.parse import urlparse as _urlparse
24822487

24832488
from specify_cli._download_security import read_response_limited as _read_response_limited
24842489
from specify_cli.authentication.http import open_url as _open_url
@@ -2510,7 +2515,17 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
25102515
) as resp:
25112516
final_url = resp.geturl()
25122517
if not is_https_or_localhost_http(final_url):
2513-
console.print(f"[red]Error:[/red] URL redirected to non-HTTPS: {final_url}")
2518+
final_parsed = _urlparse(final_url)
2519+
if not final_parsed.hostname:
2520+
console.print(
2521+
f"[red]Error:[/red] URL redirected to a URL with no hostname: {final_url}"
2522+
)
2523+
else:
2524+
console.print(
2525+
"[red]Error:[/red] URL redirected to a URL without HTTPS "
2526+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
2527+
f"{final_url}"
2528+
)
25142529
raise typer.Exit(1)
25152530
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp:
25162531
tmp.write(
@@ -2587,6 +2602,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
25872602

25882603
try:
25892604
from functools import partial
2605+
from urllib.parse import urlparse as _urlparse
25902606

25912607
from specify_cli.authentication.http import open_url as _open_url
25922608
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
@@ -2613,9 +2629,17 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
26132629
if workflow_dir.exists():
26142630
import shutil
26152631
shutil.rmtree(workflow_dir, ignore_errors=True)
2616-
console.print(
2617-
f"[red]Error:[/red] Workflow '{source}' redirected to non-HTTPS URL: {final_url}"
2618-
)
2632+
final_parsed = _urlparse(final_url)
2633+
if not final_parsed.hostname:
2634+
console.print(
2635+
f"[red]Error:[/red] Workflow '{source}' redirected to a URL with no hostname: {final_url}"
2636+
)
2637+
else:
2638+
console.print(
2639+
f"[red]Error:[/red] Workflow '{source}' redirected to a URL without HTTPS "
2640+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
2641+
f"{final_url}"
2642+
)
26192643
raise typer.Exit(1)
26202644
workflow_file.write_bytes(
26212645
_read_response_limited(
@@ -3052,20 +3076,28 @@ def workflow_step_add(
30523076
def _safe_fetch(url: str) -> bytes:
30533077
parsed = urlparse(url)
30543078
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
3055-
if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost):
3056-
raise ValueError(f"Refusing to fetch from non-HTTPS URL: {url}")
30573079
if not parsed.hostname:
30583080
raise ValueError(f"Refusing to fetch from URL with no hostname: {url}")
3059-
with _open_url(url, timeout=30) as resp:
3081+
if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost):
3082+
raise ValueError(
3083+
"Refusing to fetch from URL without HTTPS "
3084+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
3085+
f"{url}"
3086+
)
3087+
with _open_url(url, timeout=30, strict_redirects=True) as resp:
30603088
final_url = resp.geturl()
30613089
final_parsed = urlparse(final_url)
30623090
final_is_localhost = final_parsed.hostname in ("localhost", "127.0.0.1", "::1")
3091+
if not final_parsed.hostname:
3092+
raise ValueError(f"Redirect to URL with no hostname: {final_url}")
30633093
if final_parsed.scheme != "https" and not (
30643094
final_parsed.scheme == "http" and final_is_localhost
30653095
):
3066-
raise ValueError(f"Redirect to non-HTTPS URL: {final_url}")
3067-
if not final_parsed.hostname:
3068-
raise ValueError(f"Redirect to URL with no hostname: {final_url}")
3096+
raise ValueError(
3097+
"Redirect to URL without HTTPS "
3098+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
3099+
f"{final_url}"
3100+
)
30693101
return _read_response_limited(resp, label=f"workflow step {url}")
30703102

30713103
_validate_step_id_or_exit(step_id)

src/specify_cli/catalogs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,18 @@ def _entry(
6868

6969
@classmethod
7070
def _validate_catalog_url(cls, url: str) -> None:
71-
"""Validate that a catalog URL uses HTTPS, except localhost HTTP."""
71+
"""Validate that a catalog URL uses HTTPS, except loopback HTTP."""
7272
from urllib.parse import urlparse
7373

7474
parsed = urlparse(url)
75+
if not parsed.hostname:
76+
raise cls._error("Catalog URL must be a valid URL with a host.")
7577
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
7678
if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost):
7779
raise cls._error(
7880
f"Catalog URL must use HTTPS (got {parsed.scheme}://). "
79-
"HTTP is only allowed for localhost."
81+
"HTTP is only allowed for localhost, 127.0.0.1, and ::1."
8082
)
81-
if not parsed.netloc:
82-
raise cls._error("Catalog URL must be a valid URL with a host.")
8383

8484
def _load_catalog_config(self, config_path: Path) -> list[CatalogEntry] | None:
8585
"""Load catalog stack configuration from a YAML file.

src/specify_cli/extensions.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,14 +1995,20 @@ def _open_url(
19951995
url: str,
19961996
timeout: int = 10,
19971997
extra_headers: Optional[Dict[str, str]] = None,
1998+
strict_redirects: bool = True,
19981999
):
19992000
"""Open a URL with provider-based auth, trying each configured provider.
20002001
20012002
Delegates to :func:`specify_cli.authentication.http.open_url`.
20022003
"""
20032004
from specify_cli.authentication.http import open_url
20042005

2005-
return open_url(url, timeout, extra_headers=extra_headers)
2006+
return open_url(
2007+
url,
2008+
timeout,
2009+
extra_headers=extra_headers,
2010+
strict_redirects=strict_redirects,
2011+
)
20062012

20072013
def _resolve_github_release_asset_api_url(
20082014
self,
@@ -2220,7 +2226,11 @@ def _fetch_single_catalog(
22202226

22212227
# Fetch from network
22222228
try:
2223-
with self._open_url(entry.url, timeout=10) as response:
2229+
with self._open_url(
2230+
entry.url,
2231+
timeout=10,
2232+
strict_redirects=True,
2233+
) as response:
22242234
catalog_data = json.loads(
22252235
read_response_limited(
22262236
response,
@@ -2404,7 +2414,11 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]:
24042414
try:
24052415
import urllib.error
24062416

2407-
with self._open_url(catalog_url, timeout=10) as response:
2417+
with self._open_url(
2418+
catalog_url,
2419+
timeout=10,
2420+
strict_redirects=True,
2421+
) as response:
24082422
catalog_data = json.loads(
24092423
read_response_limited(
24102424
response,
@@ -2562,10 +2576,16 @@ def download_extension(
25622576
from urllib.parse import urlparse
25632577

25642578
parsed = urlparse(download_url)
2579+
if not parsed.hostname:
2580+
raise ExtensionError(
2581+
f"Extension download URL must be a valid URL with a host: {download_url}"
2582+
)
25652583
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
25662584
if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost):
25672585
raise ExtensionError(
2568-
f"Extension download URL must use HTTPS: {download_url}"
2586+
"Extension download URL must use HTTPS "
2587+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
2588+
f"{download_url}"
25692589
)
25702590

25712591
# Determine target path
@@ -2586,7 +2606,10 @@ def download_extension(
25862606
# Download the ZIP file
25872607
try:
25882608
with self._open_url(
2589-
download_url, timeout=60, extra_headers=extra_headers
2609+
download_url,
2610+
timeout=60,
2611+
extra_headers=extra_headers,
2612+
strict_redirects=True,
25902613
) as response:
25912614
zip_data = read_response_limited(
25922615
response,

src/specify_cli/presets/__init__.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,7 @@ def __init__(self, project_root: Path):
18401840
self.cache_metadata_file = self.cache_dir / "catalog-metadata.json"
18411841

18421842
def _validate_catalog_url(self, url: str) -> None:
1843-
"""Validate that a catalog URL uses HTTPS (localhost HTTP allowed).
1843+
"""Validate that a catalog URL uses HTTPS (loopback HTTP allowed).
18441844
18451845
Args:
18461846
url: URL to validate
@@ -1851,17 +1851,17 @@ def _validate_catalog_url(self, url: str) -> None:
18511851
from urllib.parse import urlparse
18521852

18531853
parsed = urlparse(url)
1854+
if not parsed.hostname:
1855+
raise PresetValidationError(
1856+
"Catalog URL must be a valid URL with a host."
1857+
)
18541858
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
18551859
if parsed.scheme != "https" and not (
18561860
parsed.scheme == "http" and is_localhost
18571861
):
18581862
raise PresetValidationError(
18591863
f"Catalog URL must use HTTPS (got {parsed.scheme}://). "
1860-
"HTTP is only allowed for localhost."
1861-
)
1862-
if not parsed.netloc:
1863-
raise PresetValidationError(
1864-
"Catalog URL must be a valid URL with a host."
1864+
"HTTP is only allowed for localhost, 127.0.0.1, and ::1."
18651865
)
18661866

18671867
def _make_request(self, url: str):
@@ -1877,13 +1877,19 @@ def _open_url(
18771877
url: str,
18781878
timeout: int = 10,
18791879
extra_headers: Optional[Dict[str, str]] = None,
1880+
strict_redirects: bool = True,
18801881
):
18811882
"""Open a URL with provider-based auth, trying each configured provider.
18821883
18831884
Delegates to :func:`specify_cli.authentication.http.open_url`.
18841885
"""
18851886
from specify_cli.authentication.http import open_url
1886-
return open_url(url, timeout, extra_headers=extra_headers)
1887+
return open_url(
1888+
url,
1889+
timeout,
1890+
extra_headers=extra_headers,
1891+
strict_redirects=strict_redirects,
1892+
)
18871893

18881894
def _resolve_github_release_asset_api_url(
18891895
self,
@@ -2161,7 +2167,11 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool =
21612167
pass
21622168

21632169
try:
2164-
with self._open_url(entry.url, timeout=10) as response:
2170+
with self._open_url(
2171+
entry.url,
2172+
timeout=10,
2173+
strict_redirects=True,
2174+
) as response:
21652175
catalog_data = json.loads(
21662176
read_response_limited(
21672177
response,
@@ -2319,7 +2329,11 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]:
23192329
pass
23202330

23212331
try:
2322-
with self._open_url(catalog_url, timeout=10) as response:
2332+
with self._open_url(
2333+
catalog_url,
2334+
timeout=10,
2335+
strict_redirects=True,
2336+
) as response:
23232337
catalog_data = json.loads(
23242338
read_response_limited(
23252339
response,
@@ -2492,12 +2506,18 @@ def download_pack(
24922506
from urllib.parse import urlparse
24932507

24942508
parsed = urlparse(download_url)
2509+
if not parsed.hostname:
2510+
raise PresetError(
2511+
f"Preset download URL must be a valid URL with a host: {download_url}"
2512+
)
24952513
is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1")
24962514
if parsed.scheme != "https" and not (
24972515
parsed.scheme == "http" and is_localhost
24982516
):
24992517
raise PresetError(
2500-
f"Preset download URL must use HTTPS: {download_url}"
2518+
"Preset download URL must use HTTPS "
2519+
"(HTTP is allowed only for localhost, 127.0.0.1, and ::1): "
2520+
f"{download_url}"
25012521
)
25022522

25032523
if target_dir is None:
@@ -2515,7 +2535,12 @@ def download_pack(
25152535
extra_headers = {"Accept": "application/octet-stream"}
25162536

25172537
try:
2518-
with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response:
2538+
with self._open_url(
2539+
download_url,
2540+
timeout=60,
2541+
extra_headers=extra_headers,
2542+
strict_redirects=True,
2543+
) as response:
25192544
zip_data = read_response_limited(
25202545
response,
25212546
error_type=PresetError,

src/specify_cli/workflows/catalog.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def __init__(self, project_root: Path) -> None:
160160
# -- Catalog resolution -----------------------------------------------
161161

162162
def _validate_catalog_url(self, url: str) -> None:
163-
"""Validate that a catalog URL uses HTTPS (localhost HTTP allowed)."""
163+
"""Validate that a catalog URL uses HTTPS (loopback HTTP allowed)."""
164164
from urllib.parse import urlparse
165165

166166
parsed = urlparse(url)
@@ -776,15 +776,15 @@ def _is_cache_path_safe(self) -> bool:
776776
# -- Catalog resolution -----------------------------------------------
777777

778778
def _validate_catalog_url(self, url: str) -> None:
779-
"""Validate that a catalog URL uses HTTPS (localhost HTTP allowed)."""
780-
if not is_https_or_localhost_http(url):
781-
from urllib.parse import urlparse
779+
"""Validate that a catalog URL uses HTTPS (loopback HTTP allowed)."""
780+
from urllib.parse import urlparse
782781

783-
parsed = urlparse(url)
784-
if not parsed.hostname:
785-
raise StepValidationError(
786-
"Catalog URL must be a valid URL with a host."
787-
)
782+
parsed = urlparse(url)
783+
if not parsed.hostname:
784+
raise StepValidationError(
785+
"Catalog URL must be a valid URL with a host."
786+
)
787+
if not is_https_or_localhost_http(url):
788788
raise StepValidationError(
789789
f"Catalog URL must use HTTPS (got {parsed.scheme}://). "
790790
"HTTP is only allowed for localhost, 127.0.0.1, and ::1."

tests/integrations/test_integration_catalog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,7 @@ def test_add_catalog_accepts_numeric_string_priority(self, tmp_path, monkeypatch
10721072
("bad_url", "reason"),
10731073
[
10741074
("http://insecure.example.com/catalog.json", "HTTPS"),
1075-
(123, "HTTPS"),
1075+
(123, "valid URL with a host"),
10761076
],
10771077
)
10781078
def test_add_catalog_rejects_existing_entry_with_bad_url(

0 commit comments

Comments
 (0)