Skip to content

Commit 1add203

Browse files
authored
fix(extensions,presets,workflows): resolve private GHES release assets via /api/v3 (#3157)
* feat(auth): add github_provider_hosts() to enumerate GHES hosts from auth.json Assisted-by: Claude Code (model: claude-sonnet-4-6, autonomous) * fix(extensions): resolve GHES release assets via /api/v3 Generalizes resolve_github_release_asset_api_url to GitHub Enterprise Server hosts (gated by auth.json github hosts), fixing private GHES extension/preset downloads. #3147 Assisted-by: Claude Code (model: claude-sonnet-4-6, autonomous) * fix(extensions,presets): pass auth.json github hosts into release resolver Assisted-by: Claude Code (model: claude-sonnet-4-6, autonomous) * docs(auth): document GHES private catalog + release-asset auth Assisted-by: Claude Code (model: claude-sonnet-4-6, autonomous) * fix(presets,workflows): pass auth.json github hosts into remaining release resolvers Wires preset add --from and workflow add through github_provider_hosts() so private GHES release assets resolve via /api/v3 there too. #3147 Assisted-by: Claude Code (model: claude-sonnet-4-6, autonomous) * test(presets): use module-level io.BytesIO in GHES preset test Addresses Copilot review on PR #3157: drop unnecessary __import__("io") in test_preset_add_from_ghes_release_url_resolves_via_api_v3 since io is already imported at module level. * fix(github-http): pass through GHES asset API URLs by path shape Addresses Copilot review on PR #3157. A direct GHES /api/v3 release asset URL was only returned as already-resolved when its host was in the allowlist; otherwise the resolver returned None and the caller downloaded the same URL without 'Accept: application/octet-stream', fetching JSON metadata instead of the binary. Gate the passthrough on path shape alone, mirroring the github.com case. This is safe: passthrough returns the input URL unchanged and the caller fetches it either way, so no new request to an arbitrary host is induced; the token stays independently gated by auth.json in open_url. The allowlist remains the anti-SSRF gate on the tag-lookup resolving path. Add test_passthrough_for_unlisted_ghes_api_asset_url.
1 parent 7624dd6 commit 1add203

12 files changed

Lines changed: 544 additions & 38 deletions

File tree

docs/reference/authentication.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,33 @@ Either `token` or `token_env` must be set for `bearer` and `basic-pat` schemes.
6969
}
7070
```
7171
72+
### GitHub Enterprise Server (GHES)
73+
74+
To use a private catalog or extension hosted on a GitHub Enterprise Server
75+
instance, add a `github` entry listing your GHES host(s). The same entry
76+
authenticates both catalog JSON fetches **and** private release-asset
77+
downloads — Specify recognizes the listed hosts as GitHub Enterprise and
78+
resolves release downloads through the GHES REST API (`/api/v3`).
79+
80+
```json
81+
{
82+
"providers": [
83+
{
84+
"hosts": ["ghes.example.com", "raw.ghes.example.com", "codeload.ghes.example.com"],
85+
"provider": "github",
86+
"auth": "bearer",
87+
"token_env": "GH_ENTERPRISE_TOKEN"
88+
}
89+
]
90+
}
91+
```
92+
93+
List the **bare** web host (e.g. `ghes.example.com`) — release-download URLs
94+
live there. If your instance uses subdomain isolation, also list the `raw.`
95+
and `codeload.` subdomains your catalog/extension URLs use. A
96+
`*.ghes.example.com` wildcard matches subdomains but **not** the bare host,
97+
so always include the bare host explicitly.
98+
7299
### Azure DevOps (`azure-devops`)
73100

74101
| Scheme | Header | Use for |

src/specify_cli/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,9 +1128,10 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
11281128
raise typer.Exit(1)
11291129

11301130
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
1131+
from specify_cli.authentication.http import github_provider_hosts
11311132

11321133
_wf_url_extra_headers = None
1133-
_resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30)
1134+
_resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30, github_hosts=github_provider_hosts())
11341135
if _resolved_wf_url:
11351136
source = _resolved_wf_url
11361137
_wf_url_extra_headers = {"Accept": "application/octet-stream"}
@@ -1234,10 +1235,11 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
12341235

12351236
try:
12361237
from specify_cli.authentication.http import open_url as _open_url
1238+
from specify_cli.authentication.http import github_provider_hosts
12371239
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset
12381240

12391241
_wf_cat_extra_headers = None
1240-
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30)
1242+
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30, github_hosts=github_provider_hosts())
12411243
if _resolved_workflow_url:
12421244
workflow_url = _resolved_workflow_url
12431245
_wf_cat_extra_headers = {"Accept": "application/octet-stream"}

src/specify_cli/_github_http.py

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import os
1212
import urllib.request
13+
from fnmatch import fnmatch
1314
from typing import Callable, Dict, Optional
1415
from urllib.parse import quote, unquote, urlparse
1516

@@ -56,55 +57,79 @@ def build_github_request(url: str) -> urllib.request.Request:
5657
return urllib.request.Request(url, headers=headers)
5758

5859

60+
def _host_matches(hostname: str, patterns: tuple[str, ...]) -> bool:
61+
"""Return True when *hostname* matches a pattern (exact or ``*.suffix``)."""
62+
hostname = hostname.lower()
63+
return any(p == hostname or fnmatch(hostname, p) for p in patterns)
64+
65+
5966
def resolve_github_release_asset_api_url(
6067
download_url: str,
6168
open_url_fn: Callable,
6269
timeout: int = 60,
70+
github_hosts: tuple[str, ...] = (),
6371
) -> Optional[str]:
64-
"""Resolve a GitHub browser release URL to its REST API asset URL.
65-
66-
For private or SSO-protected repositories, browser release download
67-
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
68-
redirect to an HTML/SSO page instead of delivering the file. This
69-
helper resolves such a URL to the matching GitHub REST API asset URL
70-
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
71-
then be downloaded with ``Accept: application/octet-stream`` and an
72-
auth token to retrieve the actual file payload.
73-
74-
If *download_url* is already a REST API asset URL, it is returned
75-
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
76-
URLs return ``None``. If the API lookup fails (e.g. network error or
77-
asset not found), ``None`` is returned so callers can fall back to the
78-
original URL.
72+
"""Resolve a GitHub release browser-download URL to its REST API asset URL.
73+
74+
Works for public ``github.com`` and for GitHub Enterprise Server (GHES)
75+
hosts. A host is treated as GHES when it matches one of *github_hosts*
76+
(exact hostname or ``*.suffix``) — supply the hosts the user has trusted
77+
under a ``github`` provider in ``auth.json``. This allowlist is the
78+
security gate: unlisted hosts never receive GHES API treatment, so a
79+
malicious catalog cannot induce an API request to an arbitrary host.
80+
81+
For a public URL the API base is ``https://api.github.com``; for a GHES
82+
host it is ``{scheme}://{host[:port]}/api/v3``. Returns the API asset URL
83+
(downloadable with ``Accept: application/octet-stream`` + a token), the
84+
input unchanged if it is already an API asset URL, or ``None`` when the
85+
URL is not a resolvable GitHub release download or the lookup fails.
7986
8087
Args:
8188
download_url: The URL to resolve.
8289
open_url_fn: A callable compatible with
83-
``specify_cli.authentication.http.open_url`` used to make the
84-
authenticated API request.
90+
``specify_cli.authentication.http.open_url`` used for the
91+
authenticated release-metadata lookup.
8592
timeout: Per-request timeout in seconds.
86-
87-
Returns:
88-
The resolved REST API asset URL, or ``None`` if resolution is not
89-
applicable or fails.
93+
github_hosts: Host patterns to treat as GitHub Enterprise Server.
9094
"""
9195
import json
9296
import urllib.error
9397

9498
parsed = urlparse(download_url)
99+
hostname = (parsed.hostname or "").lower()
95100
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]
96101

97-
# Already a REST API asset URL — use it directly
98-
if (
99-
parsed.hostname == "api.github.com"
100-
and len(parts) >= 6
101-
and parts[:1] == ["repos"]
102-
and parts[3:5] == ["releases", "assets"]
103-
):
102+
is_ghes = (
103+
bool(hostname)
104+
and hostname not in GITHUB_HOSTS
105+
and _host_matches(hostname, github_hosts)
106+
)
107+
108+
def _is_asset_path(segments: list[str]) -> bool:
109+
return (
110+
len(segments) >= 6
111+
and segments[:1] == ["repos"]
112+
and segments[3:5] == ["releases", "assets"]
113+
)
114+
115+
# Already a REST API asset URL — use it directly. Pure passthrough induces
116+
# no new request: the caller fetches this same URL regardless, so it is
117+
# gated on path shape alone rather than the GHES allowlist. The token stays
118+
# independently gated by auth.json in the download helper, and only the
119+
# resolving path below (which issues a tag-lookup request) needs the
120+
# allowlist as its anti-SSRF gate.
121+
if hostname == "api.github.com" and _is_asset_path(parts):
122+
return download_url
123+
if hostname and parts[:2] == ["api", "v3"] and _is_asset_path(parts[2:]):
104124
return download_url
105125

106-
# Only handle github.com browser release download URLs
107-
if parsed.hostname != "github.com":
126+
# Determine the REST API base for browser release-download URLs.
127+
if hostname == "github.com":
128+
api_base = "https://api.github.com"
129+
elif is_ghes:
130+
authority = hostname if parsed.port is None else f"{hostname}:{parsed.port}"
131+
api_base = f"{parsed.scheme}://{authority}/api/v3"
132+
else:
108133
return None
109134

110135
# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
@@ -114,7 +139,7 @@ def resolve_github_release_asset_api_url(
114139
owner, repo, tag = parts[0], parts[1], parts[4]
115140
asset_name = "/".join(parts[5:])
116141
encoded_tag = quote(tag, safe="")
117-
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"
142+
release_url = f"{api_base}/repos/{owner}/{repo}/releases/tags/{encoded_tag}"
118143

119144
try:
120145
with open_url_fn(release_url, timeout=timeout) as response:

src/specify_cli/authentication/http.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,20 @@ def build_request(url: str, extra_headers: dict[str, str] | None = None) -> urll
118118
return urllib.request.Request(url, headers=headers)
119119

120120

121+
def github_provider_hosts() -> tuple[str, ...]:
122+
"""Return host patterns from every ``github`` provider entry in ``auth.json``.
123+
124+
Used to classify which hosts are GitHub Enterprise Server instances when
125+
resolving release-asset download URLs. Returns an empty tuple when no
126+
``auth.json`` exists or it contains no ``github`` entries.
127+
"""
128+
hosts: list[str] = []
129+
for entry in _load_config():
130+
if entry.provider == "github":
131+
hosts.extend(entry.hosts)
132+
return tuple(hosts)
133+
134+
121135
def open_url(
122136
url: str,
123137
timeout: int = 10,

src/specify_cli/extensions/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,12 +2057,18 @@ def _resolve_github_release_asset_api_url(
20572057
) -> Optional[str]:
20582058
"""Resolve a GitHub release asset URL to its API asset URL.
20592059
2060-
Delegates to the shared helper in :mod:`specify_cli._github_http`.
2060+
Delegates to the shared helper in :mod:`specify_cli._github_http`,
2061+
passing the ``github`` provider hosts from ``auth.json`` so GitHub
2062+
Enterprise Server release assets resolve via ``/api/v3``.
20612063
"""
20622064
from specify_cli._github_http import resolve_github_release_asset_api_url
2065+
from specify_cli.authentication.http import github_provider_hosts
20632066

20642067
return resolve_github_release_asset_api_url(
2065-
download_url, self._open_url, timeout=timeout
2068+
download_url,
2069+
self._open_url,
2070+
timeout=timeout,
2071+
github_hosts=github_provider_hosts(),
20662072
)
20672073

20682074
def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:

src/specify_cli/presets/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,10 +1892,19 @@ def _resolve_github_release_asset_api_url(
18921892
download_url: str,
18931893
timeout: int = 60,
18941894
) -> Optional[str]:
1895-
"""Resolve a GitHub release asset URL to its REST API asset URL."""
1895+
"""Resolve a GitHub release asset URL to its REST API asset URL.
1896+
1897+
Passes the ``github`` provider hosts from ``auth.json`` so GitHub
1898+
Enterprise Server release assets resolve via ``/api/v3``.
1899+
"""
18961900
from specify_cli._github_http import resolve_github_release_asset_api_url
1901+
from specify_cli.authentication.http import github_provider_hosts
1902+
18971903
return resolve_github_release_asset_api_url(
1898-
download_url, self._open_url, timeout=timeout
1904+
download_url,
1905+
self._open_url,
1906+
timeout=timeout,
1907+
github_hosts=github_provider_hosts(),
18991908
)
19001909

19011910
def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:

src/specify_cli/presets/_commands.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,13 @@ def _validate_download_redirect(old_url, new_url):
144144
zip_path = Path(tmpdir) / "preset.zip"
145145
try:
146146
from specify_cli.authentication.http import open_url as _open_url
147+
from specify_cli.authentication.http import github_provider_hosts
147148
from specify_cli._github_http import resolve_github_release_asset_api_url
148149

149150
_preset_extra_headers = None
150-
_resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url)
151+
_resolved_from_url = resolve_github_release_asset_api_url(
152+
from_url, _open_url, github_hosts=github_provider_hosts()
153+
)
151154
if _resolved_from_url:
152155
from_url = _resolved_from_url
153156
_preset_extra_headers = {"Accept": "application/octet-stream"}

tests/test_authentication.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,3 +900,45 @@ def test_accept_header_present(self, monkeypatch):
900900
with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect):
901901
_fetch_latest_release_tag()
902902
assert captured["request"].get_header("Accept") == "application/vnd.github+json"
903+
904+
905+
# ---------------------------------------------------------------------------
906+
# github_provider_hosts
907+
# ---------------------------------------------------------------------------
908+
909+
910+
class TestGithubProviderHosts:
911+
"""Tests for github_provider_hosts() — the GHES host allowlist source."""
912+
913+
def _set_config(self, monkeypatch, entries):
914+
from specify_cli.authentication import http as _auth_http
915+
monkeypatch.setattr(_auth_http, "_config_override", entries)
916+
917+
def test_returns_hosts_from_github_entries(self, monkeypatch):
918+
from specify_cli.authentication.http import github_provider_hosts
919+
self._set_config(monkeypatch, [
920+
AuthConfigEntry(hosts=("ghes.example", "raw.ghes.example"),
921+
provider="github", auth="bearer", token="t"),
922+
])
923+
assert github_provider_hosts() == ("ghes.example", "raw.ghes.example")
924+
925+
def test_empty_when_no_config(self, monkeypatch):
926+
from specify_cli.authentication.http import github_provider_hosts
927+
self._set_config(monkeypatch, [])
928+
assert github_provider_hosts() == ()
929+
930+
def test_ignores_non_github_providers(self, monkeypatch):
931+
from specify_cli.authentication.http import github_provider_hosts
932+
self._set_config(monkeypatch, [
933+
AuthConfigEntry(hosts=("dev.azure.com",), provider="azure-devops",
934+
auth="basic-pat", token="t"),
935+
])
936+
assert github_provider_hosts() == ()
937+
938+
def test_unions_multiple_github_entries(self, monkeypatch):
939+
from specify_cli.authentication.http import github_provider_hosts
940+
self._set_config(monkeypatch, [
941+
AuthConfigEntry(hosts=("ghes.example",), provider="github", auth="bearer", token="t"),
942+
AuthConfigEntry(hosts=("github.com",), provider="github", auth="bearer", token="t"),
943+
])
944+
assert github_provider_hosts() == ("ghes.example", "github.com")

tests/test_extensions.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import tempfile
1717
import shutil
1818
import tomllib
19+
from contextlib import contextmanager
1920
from pathlib import Path
2021
from datetime import datetime, timezone
22+
from unittest.mock import MagicMock
2123

2224
from tests.conftest import strip_ansi
2325
from specify_cli.extensions import (
@@ -7280,3 +7282,36 @@ def test_add_dev_force_reinstall(self, tmp_path):
72807282
)
72817283
assert result2.exit_code == 0, strip_ansi(result2.output)
72827284
assert "installed" in strip_ansi(result2.output)
7285+
7286+
7287+
def test_extension_wrapper_resolves_ghes_asset_when_host_configured(tmp_path, monkeypatch):
7288+
"""End-to-end wiring: auth.json github host → GHES asset resolution."""
7289+
from specify_cli.authentication import http as _auth_http
7290+
from specify_cli.authentication.config import AuthConfigEntry
7291+
from specify_cli.extensions import ExtensionCatalog
7292+
7293+
monkeypatch.setattr(_auth_http, "_config_override", [
7294+
AuthConfigEntry(hosts=("ghes.example",), provider="github",
7295+
auth="bearer", token="t"),
7296+
])
7297+
catalog = ExtensionCatalog(tmp_path)
7298+
7299+
captured = []
7300+
7301+
@contextmanager
7302+
def fake_open(url, timeout=None, extra_headers=None):
7303+
captured.append(url)
7304+
resp = MagicMock()
7305+
resp.read.return_value = json.dumps({
7306+
"assets": [{"name": "ext.zip",
7307+
"url": "https://ghes.example/api/v3/repos/o/r/releases/assets/7"}]
7308+
}).encode()
7309+
yield resp
7310+
7311+
monkeypatch.setattr(catalog, "_open_url", fake_open)
7312+
7313+
resolved = catalog._resolve_github_release_asset_api_url(
7314+
"https://ghes.example/o/r/releases/download/v1/ext.zip"
7315+
)
7316+
assert resolved == "https://ghes.example/api/v3/repos/o/r/releases/assets/7"
7317+
assert captured == ["https://ghes.example/api/v3/repos/o/r/releases/tags/v1"]

0 commit comments

Comments
 (0)