From 5bfc674bd3192284ce3d1563484c8b4bdb47ac05 Mon Sep 17 00:00:00 2001 From: Aaryan Dadu Date: Fri, 19 Jun 2026 20:37:30 +0530 Subject: [PATCH] perf: ADO REST metadata fetcher to avoid clone-based fetch Azure DevOps marketplace sources fetched metadata (apm.yml, description, version) through the generic-git path, resolving files via a subprocess clone. GitHub and GitLab already have dedicated REST fetchers that avoid a clone for single-file metadata reads; ADO now matches that fast path. Add an `ado` source kind that routes to a new `_fetch_ado` via the existing _FETCHERS dispatch. It reads single files through the Azure DevOps Git items API (GET .../_apis/git/repositories/{repo}/items) and falls back to the generic-git clone on any REST/transport failure or offline condition, so there is no regression. A confirmed 404 returns None (file absent) without a wasted clone. Auth reuses AuthResolver's ADO context via try_with_fallback: ADO_APM_PAT as HTTP Basic with an AAD `az` bearer runtime fallback; the token is never logged or placed in the URL. ADO joins the JSON sidecar cache (1h TTL) for parity with the GitLab REST fetcher. --- src/apm_cli/commands/marketplace/__init__.py | 1 + src/apm_cli/marketplace/client.py | 174 +++++++- src/apm_cli/marketplace/models.py | 8 +- src/apm_cli/marketplace/resolver.py | 7 +- src/apm_cli/utils/github_host.py | 59 +++ .../integration/test_marketplace_ado_rest.py | 169 +++++++ tests/unit/marketplace/test_client_ado.py | 413 ++++++++++++++++++ tests/unit/marketplace/test_client_git.py | 11 +- tests/unit/test_github_host.py | 55 +++ 9 files changed, 880 insertions(+), 17 deletions(-) create mode 100644 tests/integration/test_marketplace_ado_rest.py create mode 100644 tests/unit/marketplace/test_client_ado.py diff --git a/src/apm_cli/commands/marketplace/__init__.py b/src/apm_cli/commands/marketplace/__init__.py index 5a830bec0..3b03cca9a 100644 --- a/src/apm_cli/commands/marketplace/__init__.py +++ b/src/apm_cli/commands/marketplace/__init__.py @@ -791,6 +791,7 @@ def _display_source_kind(kind: str, is_direct_url: bool) -> str: labels = { "github": "GitHub repository", "gitlab": "GitLab repository", + "ado": "Azure DevOps repository", "git": "generic git repository", "local": "local filesystem path", } diff --git a/src/apm_cli/marketplace/client.py b/src/apm_cli/marketplace/client.py index f7c9eb232..8ea0358e0 100644 --- a/src/apm_cli/marketplace/client.py +++ b/src/apm_cli/marketplace/client.py @@ -4,7 +4,10 @@ - ``github`` / ``gitlab`` -> host file API via ``_fetch_via_api`` (auth-routed through ``AuthResolver.try_with_fallback`` and the JSON sidecar cache). -- ``git`` -> generic git URL (ADO, Gitea, self-hosted, etc.) via subprocess +- ``ado`` -> Azure DevOps REST items API (``_fetch_ado``, auth-routed through + ``AuthResolver.try_with_fallback`` with the JSON sidecar cache), falling back + to the generic-git path on any REST/transport failure. +- ``git`` -> generic git URL (Gitea, self-hosted, etc.) via subprocess through ``GitCache``; ``git ls-remote`` is the freshness check, no JSON sidecar cache. - ``local`` -> bare repo (``git --git-dir=... show :``), working @@ -17,6 +20,7 @@ ``~/.apm/cache/marketplace/`` with a 1-hour TTL. """ +import base64 import contextlib import hashlib import json @@ -119,10 +123,12 @@ def _cache_key(source: MarketplaceSource) -> str: return f"url__{hashlib.sha256(source.url.encode()).hexdigest()[:16]}" if kind == "local": return f"local__{_sanitize_cache_name(source.name)}" - if kind == "git": - # Generic git: include host so a.com/o/r vs b.com/o/r never collapse. + if kind in ("git", "ado"): + # Generic git / ADO: include host so a.com/o/r vs b.com/o/r never + # collapse, and prefix by kind so the same host on the two paths keeps + # distinct sidecar files. host = _host_from_url(source.url) or source.host or "unknown" - return f"git__{_sanitize_cache_name(host)}__{_sanitize_cache_name(source.name)}" + return f"{kind}__{_sanitize_cache_name(host)}__{_sanitize_cache_name(source.name)}" normalized_host = (source.host or "github.com").lower() if normalized_host == "github.com": return source.name @@ -567,6 +573,153 @@ def _fetch_git( raise MarketplaceFetchError(source.name, f"failed to read {file_path}: {exc}") from exc +# --------------------------------------------------------------------------- +# Network fetch -- Azure DevOps REST items API (fast path, git fallback) +# --------------------------------------------------------------------------- + + +class _AdoItemNotFound(Exception): + """Sentinel: the ADO items API returned a confirmed 404 for the path. + + Distinguishes "the file is definitively absent at this ref" (map to + ``None`` so ``_auto_detect_path`` can probe the next candidate) from a + transport/auth failure (fall back to the generic-git clone path). + """ + + +def _ado_auth_header(token: str | None, git_env: dict | None) -> dict[str, str]: + """Build the Azure DevOps ``Authorization`` header for a resolved token. + + ``AuthResolver.try_with_fallback`` hands the operation a ``(token, git_env)`` + pair but not the auth scheme. Bearer contexts carry the full + ``Authorization: Bearer `` header in ``GIT_CONFIG_VALUE_0`` (see + ``AuthResolver._build_git_env``); detect that and emit the Bearer scheme. + Otherwise treat the token as an ADO PAT and use HTTP Basic with + ``base64(":" + PAT)`` per ADO's convention (empty username, PAT as + password). Returns an empty dict for an anonymous request. + + The returned dict carries the credential -- callers MUST NOT log it. + """ + if not token: + return {} + extra_header = (git_env or {}).get("GIT_CONFIG_VALUE_0", "") + if extra_header.lower().startswith("authorization: bearer "): + return {"Authorization": f"Bearer {token}"} + encoded = base64.b64encode(f":{token}".encode()).decode("ascii") + return {"Authorization": f"Basic {encoded}"} + + +def _fetch_ado_rest( + source: MarketplaceSource, + file_path: str, + *, + org: str, + project: str, + repo: str, + host: str, + auth_resolver, +) -> dict | None: + """Read a single metadata file from Azure DevOps via the REST items API. + + Routes auth through ``AuthResolver.try_with_fallback`` for the ADO host so + a resolved PAT (``ADO_APM_PAT``) is tried first and an AAD bearer (``az``) + is the runtime fallback -- the same auth posture as the clone path. The + token is never logged. Raises on any failure (network, auth, non-JSON, + sign-in page) so the caller can fall back to the generic-git path; raises + ``_AdoItemNotFound`` for a confirmed 404. + """ + from ..utils.github_host import build_ado_api_url + + url = build_ado_api_url(org, project, repo, file_path, source.ref, host) + + def _do_fetch(token, git_env): + headers = {"User-Agent": "apm-cli"} + headers.update(_ado_auth_header(token, git_env)) + resp = _http_get(url, headers=headers, timeout=30) + if resp.status_code == 404: + # No message: this sentinel flows through ``try_with_fallback`` -> + # ``is_ado_auth_failure_signal(str(exc))``; an empty string never + # trips an auth-failure keyword, so a 404 never wastes a bearer + # retry. + raise _AdoItemNotFound + # ADO answers an unauthenticated/under-scoped request with HTTP 200 + + # an HTML sign-in page rather than a 401 (#1671). Treat that as an auth + # failure so try_with_fallback can attempt the AAD bearer before we + # give up and clone. The word "unauthorized" is load-bearing: it makes + # ``is_ado_auth_failure_signal(str(exc))`` match, which is the gate the + # PAT->bearer fallback checks (see AuthResolver._try_ado_bearer_fallback). + if resp.status_code == 200: + content_type = resp.headers.get("Content-Type", "").lower() + if "text/html" in content_type: + raise MarketplaceFetchError( + source.name, + "Azure DevOps returned a sign-in page (unauthorized: authentication required)", + ) + resp.raise_for_status() + return _parse_json_text(resp) + + return auth_resolver.try_with_fallback( + host, + _do_fetch, + org=org, + path=f"{org}/{project}/{repo}", + unauth_first=False, + ) + + +def _fetch_ado( + source: MarketplaceSource, + file_path: str, + *, + host_info, + auth_resolver, +) -> dict | None: + """Fetch marketplace.json from Azure DevOps, REST-first with git fallback. + + Optional latency optimisation over the generic-git path: ADO single-file + metadata reads go through ``GET .../_apis/git/repositories/{repo}/items`` + instead of a subprocess clone, matching the GitHub/GitLab fast path. + + Falls back to ``_fetch_git`` (the subprocess clone) on any REST/transport + failure or offline condition so there is no regression vs. the prior + behaviour. A confirmed 404 returns ``None`` (the file is absent at this + path) so ``_auto_detect_path`` can probe the next candidate without paying + for a clone that would also miss. + """ + from ..utils.github_host import parse_ado_repo_url + + parsed = parse_ado_repo_url(source.url) + if parsed is None: + # URL does not decompose into org/project/repo (unusual ADO shape) -- + # nothing to REST against, so use the generic-git path directly. + return _fetch_git(source, file_path, host_info=host_info, auth_resolver=auth_resolver) + + org, project, repo = parsed + host = host_info.host if host_info is not None else "dev.azure.com" + try: + return _fetch_ado_rest( + source, + file_path, + org=org, + project=project, + repo=repo, + host=host, + auth_resolver=auth_resolver, + ) + except _AdoItemNotFound: + return None + except Exception as exc: + # REST failed (network, auth exhausted, sign-in page, malformed JSON, + # 5xx, ...). Fall back to the clone path so offline/unusual repos keep + # working. str(exc) only -- never interpolate the response/headers. + logger.debug( + "ADO REST metadata fetch failed for '%s'; falling back to generic-git: %s", + source.name, + exc, + ) + return _fetch_git(source, file_path, host_info=host_info, auth_resolver=auth_resolver) + + # --------------------------------------------------------------------------- # Local fetch (filesystem path or file://) # --------------------------------------------------------------------------- @@ -720,6 +873,7 @@ def _fetch_local_direct_read( _FETCHERS: dict[str, Callable] = { "github": _fetch_github, "gitlab": _fetch_gitlab, + "ado": _fetch_ado, "git": _fetch_git, "local": _fetch_local, } @@ -837,9 +991,9 @@ def _fetch_file( host_info = None if kind in ("github", "gitlab"): host_info = AuthResolver.classify_host(source.host) - elif kind == "git": - # For generic git, classify the host extracted from the URL so ADO etc. - # get correctly-typed auth contexts. + elif kind in ("git", "ado"): + # For ADO and generic git, classify the host extracted from the URL so + # each gets a correctly-typed auth context (ADO PAT/bearer routing). host = _host_from_url(source.url) host_info = AuthResolver.classify_host(host) if host else None @@ -891,8 +1045,8 @@ def fetch_marketplace( ) -> MarketplaceManifest: """Fetch and parse a marketplace manifest. - Uses the JSON sidecar cache for ``kind in ("github", "gitlab", "url")``. - Generic-git fetches rely on ``GitCache`` + ``git ls-remote`` for + Uses the JSON sidecar cache for ``kind in ("github", "gitlab", "ado", + "url")``. Generic-git fetches rely on ``GitCache`` + ``git ls-remote`` for freshness; local fetches read directly without caching. Args: @@ -907,7 +1061,7 @@ def fetch_marketplace( MarketplaceFetchError: If fetch fails and no cache is available. """ cache_name = _cache_key(source) - use_sidecar_cache = source.kind in ("github", "gitlab", "url") + use_sidecar_cache = source.kind in ("github", "gitlab", "ado", "url") # Try fresh cache first (API kinds only) if use_sidecar_cache and not force_refresh: diff --git a/src/apm_cli/marketplace/models.py b/src/apm_cli/marketplace/models.py index 38dece172..95d8e40d5 100644 --- a/src/apm_cli/marketplace/models.py +++ b/src/apm_cli/marketplace/models.py @@ -181,14 +181,16 @@ def is_remote_manifest_url(self) -> bool: @property def kind(self) -> str: - """Derived source kind: ``local`` | ``url`` | ``github`` | ``gitlab`` | ``git``. + """Derived source kind: ``local`` | ``url`` | ``github`` | ``gitlab`` | ``ado`` | ``git``. Classification: - Local filesystem path or ``file://`` URI -> ``local`` - Direct remote marketplace.json URL (``path == ""``) -> ``url`` - Host classified by AuthResolver as github/ghe_cloud/ghes -> ``github`` - Host classified as gitlab -> ``gitlab`` - - Anything else (ado, generic, ssh to non-classified host) -> ``git`` + - Host classified as Azure DevOps -> ``ado`` (REST items fast path with + generic-git fallback; see ``marketplace.client._fetch_ado``) + - Anything else (generic, ssh to non-classified host) -> ``git`` """ if not self.url or _looks_like_local_path(self.url): return "local" @@ -205,6 +207,8 @@ def kind(self) -> str: return "github" if host_kind == "gitlab": return "gitlab" + if host_kind == "ado": + return "ado" return "git" @property diff --git a/src/apm_cli/marketplace/resolver.py b/src/apm_cli/marketplace/resolver.py index 6e0463a6d..7164b4226 100644 --- a/src/apm_cli/marketplace/resolver.py +++ b/src/apm_cli/marketplace/resolver.py @@ -246,8 +246,9 @@ def _source_needs_explicit_git_path(source: MarketplaceSource) -> bool: For URL-first sources, the ``kind`` derivation already encodes the routing decision: any host APM doesn't classify as github-family needs the explicit git+path canonical (mirrors the existing GitLab self-managed pattern), and - that now includes Azure DevOps and generic git hosts since their - ``marketplace.json`` is fetched via subprocess git instead of an API. + that includes Azure DevOps (``ado``) and generic git hosts. ADO metadata is + read via the REST items fast path with a subprocess-git fallback, but its + plugin sources still need the explicit git+path canonical like generic git. Local marketplaces handle relative sources via :func:`_resolve_local_relative_source` on the fast path and never reach this helper. @@ -255,7 +256,7 @@ def _source_needs_explicit_git_path(source: MarketplaceSource) -> bool: kind = source.kind if kind == "github": return False - if kind in ("gitlab", "git"): + if kind in ("gitlab", "git", "ado"): return True # Fall back to legacy host-based behaviour for any kind we don't recognise return _marketplace_host_needs_explicit_git_path(source.host) diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index 2ac42095e..a092e75e7 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -620,6 +620,65 @@ def build_ado_api_url( ) +def parse_ado_repo_url(url: str | None) -> tuple[str, str, str] | None: + """Decompose an Azure DevOps repo URL into ``(org, project, repo)``. + + Inverse of :func:`build_ado_https_clone_url` -- accepts the standard + ``_git`` clone shape on both ADO hostnames: + + - ``https://dev.azure.com/{org}/{project}/_git/{repo}`` (org in path) + - ``https://{org}.visualstudio.com/{project}/_git/{repo}`` (org in subdomain) + + A trailing ``.git`` and any segments after ``{repo}`` (virtual sub-paths) + are ignored. Path segments are percent-decoded so an encoded project name + (e.g. ``my%20project``) round-trips; callers that rebuild a URL (e.g. + :func:`build_ado_api_url`) re-encode as needed. + + Returns ``None`` when *url* is not an ADO host or does not contain the + ``_git`` marker, so callers can fall back to a generic code path rather + than guessing at a malformed decomposition. + """ + if not url: + return None + try: + parsed = urllib.parse.urlsplit(url) + except ValueError: + return None + hostname = parsed.hostname or "" + if not is_azure_devops_hostname(hostname): + return None + + path = parsed.path.strip("/") + if path.endswith(".git"): + path = path[: -len(".git")] + segments = [urllib.parse.unquote(s) for s in path.split("/") if s] + if "_git" not in segments: + return None + git_idx = segments.index("_git") + # repo is the segment immediately after the ``_git`` marker. + if git_idx + 1 >= len(segments): + return None + repo = segments[git_idx + 1] + before = segments[:git_idx] + + if is_visualstudio_legacy_hostname(hostname): + # Legacy ``*.visualstudio.com``: org lives in the subdomain, the path + # before ``_git`` is just the project. + org = hostname.split(".")[0] + if len(before) < 1: + return None + project = before[-1] + else: + # ``dev.azure.com``: org and project both precede ``_git``. + if len(before) < 2: + return None + org, project = before[0], before[1] + + if not (org and project and repo): + return None + return org, project, repo + + def is_artifactory_path(path_segments: list) -> bool: """Return True if path segments indicate a JFrog Artifactory VCS repository. diff --git a/tests/integration/test_marketplace_ado_rest.py b/tests/integration/test_marketplace_ado_rest.py new file mode 100644 index 000000000..2204618fd --- /dev/null +++ b/tests/integration/test_marketplace_ado_rest.py @@ -0,0 +1,169 @@ +"""Integration coverage for ADO marketplace metadata over the REST items API. + +Exercises the public ``fetch_marketplace`` path end-to-end for an Azure DevOps +source (``kind == "ado"``): + +- REST items API is used instead of a subprocess clone, with ``ADO_APM_PAT`` + routed as HTTP Basic. +- The result is served from the JSON sidecar cache on the second fetch + (parity with the GitLab REST fetcher; no second network call). +- A REST/transport failure falls back to the generic-git path with no + regression, and the parsed manifest is returned. + +Fetcher-level unit coverage lives in +``tests/unit/marketplace/test_client_ado.py``. +""" + +from __future__ import annotations + +import base64 +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from apm_cli.core.auth import AuthResolver +from apm_cli.core.token_manager import GitHubTokenManager +from apm_cli.marketplace import client as client_mod +from apm_cli.marketplace import registry +from apm_cli.marketplace.client import fetch_marketplace +from apm_cli.marketplace.models import MarketplaceSource + +_ADO_URL = "https://dev.azure.com/contoso/platform/_git/tools" +_MANIFEST = { + "name": "ado-mkt", + "owner": "contoso", + "plugins": [ + {"name": "tool-x", "source": "./tools/x", "version": "1.0.0"}, + ], +} + + +@pytest.fixture(autouse=True) +def _isolate(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + config_dir = str(tmp_path / ".apm") + Path(config_dir).mkdir() + monkeypatch.setattr("apm_cli.config.CONFIG_DIR", config_dir) + monkeypatch.setattr("apm_cli.config.CONFIG_FILE", str(tmp_path / ".apm" / "config.json")) + monkeypatch.setattr("apm_cli.config._config_cache", None) + monkeypatch.setattr(registry, "_registry_cache", None) + # Real resolve() must not block on the git-credential helper. + monkeypatch.setattr(GitHubTokenManager, "resolve_credential_from_git", lambda *a, **k: None) + + +def _fake_response(status_code: int, *, text: str = "", content_type: str = "application/json"): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.headers = {"Content-Type": content_type} + + def _raise_for_status(): + if status_code >= 400: + raise client_mod.requests.exceptions.HTTPError(f"HTTP {status_code}") + + resp.raise_for_status.side_effect = _raise_for_status + return resp + + +def test_fetch_marketplace_ado_uses_rest_and_then_sidecar_cache(monkeypatch) -> None: + monkeypatch.setenv("ADO_APM_PAT", "pat-real") + source = MarketplaceSource(name="ado-mkt", url=_ADO_URL, ref="main") + assert source.kind == "ado" + + calls: list[tuple[str, dict]] = [] + + def fake_get(url, headers=None, timeout=None): + calls.append((url, dict(headers or {}))) + return _fake_response(200, text=json.dumps(_MANIFEST)) + + with patch("apm_cli.marketplace.client._http_get", side_effect=fake_get): + resolver = AuthResolver() + first = fetch_marketplace(source, auth_resolver=resolver) + second = fetch_marketplace(source, auth_resolver=resolver) + + assert first.name == "ado-mkt" + assert first.find_plugin("tool-x") is not None + assert second.name == "ado-mkt" + # Exactly one network call: the second fetch is served from the sidecar cache. + assert len(calls) == 1 + url, headers = calls[0] + assert "/_apis/git/repositories/tools/items" in url + expected = base64.b64encode(b":pat-real").decode("ascii") + assert headers["Authorization"] == f"Basic {expected}" + assert "pat-real" not in url + + +def test_fetch_marketplace_ado_rest_failure_falls_back_to_git(monkeypatch) -> None: + monkeypatch.setenv("ADO_APM_PAT", "pat-real") + source = MarketplaceSource(name="ado-mkt", url=_ADO_URL, ref="main") + + no_bearer = MagicMock() + no_bearer.is_available.return_value = False + + def fake_get(url, headers=None, timeout=None): + # Simulate ADO returning a sign-in page (auth insufficient). + return _fake_response(200, text="sign in", content_type="text/html") + + with ( + patch("apm_cli.core.azure_cli.get_bearer_provider", return_value=no_bearer), + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + resolver = AuthResolver() + manifest = fetch_marketplace(source, auth_resolver=resolver) + + assert manifest.name == "ado-mkt" + assert manifest.find_plugin("tool-x") is not None + git_fallback.assert_called_once() + + +def test_fetch_marketplace_ado_offline_serves_stale_cache(monkeypatch) -> None: + """REST + git both fail on a later fetch -> stale sidecar cache is served.""" + monkeypatch.setenv("ADO_APM_PAT", "pat-real") + source = MarketplaceSource(name="ado-mkt", url=_ADO_URL, ref="main") + + # Warm the cache via a successful REST fetch. + with patch( + "apm_cli.marketplace.client._http_get", + side_effect=lambda *a, **k: _fake_response(200, text=json.dumps(_MANIFEST)), + ): + fetch_marketplace(source, auth_resolver=AuthResolver()) + + # Expire the cache, then make both REST and git fail; stale cache wins. + cache_name = client_mod._cache_key(source) + meta_path = client_mod._cache_meta_path(cache_name) + with open(meta_path) as f: + meta = json.load(f) + meta["fetched_at"] = 0 # force-expire + with open(meta_path, "w") as f: + json.dump(meta, f) + + no_bearer = MagicMock() + no_bearer.is_available.return_value = False + + def boom(*a, **k): + raise client_mod.requests.exceptions.ConnectionError("offline") + + with ( + patch("apm_cli.core.azure_cli.get_bearer_provider", return_value=no_bearer), + patch("apm_cli.marketplace.client._http_get", side_effect=boom), + patch( + "apm_cli.marketplace.client._fetch_git", + side_effect=client_mod.MarketplaceFetchError("ado-mkt", "git offline"), + ), + ): + manifest = fetch_marketplace(source, auth_resolver=AuthResolver()) + + assert manifest.name == "ado-mkt" + assert manifest.find_plugin("tool-x") is not None + + +def test_marketplace_source_ado_cache_key_distinct_from_git(monkeypatch) -> None: + """ADO and generic-git sidecar files never collide for the same host/name.""" + ado = MarketplaceSource(name="m", url="https://dev.azure.com/o/p/_git/r") + # A generic-git URL on a different host keeps a distinct, kind-prefixed key. + git = MarketplaceSource(name="m", url="https://gitea.example.com/o/r.git") + assert client_mod._cache_key(ado).startswith("ado__") + assert client_mod._cache_key(git).startswith("git__") + assert client_mod._cache_key(ado) != client_mod._cache_key(git) diff --git a/tests/unit/marketplace/test_client_ado.py b/tests/unit/marketplace/test_client_ado.py new file mode 100644 index 000000000..5fe0746fc --- /dev/null +++ b/tests/unit/marketplace/test_client_ado.py @@ -0,0 +1,413 @@ +"""Unit coverage for the ``_fetch_ado`` marketplace fetcher (REST items API). + +Coverage parity with the GitLab REST fetcher (``test_marketplace_client``): +- ADO single-file reads hit ``/_apis/git/repositories/.../items`` (no clone) +- ``ADO_APM_PAT`` -> HTTP Basic ``base64(":" + PAT)`` (auth-first) +- AAD bearer context -> ``Authorization: Bearer `` +- confirmed 404 -> ``None`` (no wasted clone; path auto-detection probes next) +- sign-in HTML / network failure -> generic-git fallback (no regression) +- non-decomposable ADO URL -> generic-git directly +- token is never embedded in the request URL +- ``_FETCHERS`` dispatch routes ``kind == "ado"`` to ``_fetch_ado`` +""" + +from __future__ import annotations + +import base64 +import json +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from apm_cli.core.auth import AuthResolver +from apm_cli.core.token_manager import GitHubTokenManager +from apm_cli.marketplace import client as client_mod +from apm_cli.marketplace.client import _ado_auth_header, _fetch_ado +from apm_cli.marketplace.errors import MarketplaceFetchError +from apm_cli.marketplace.models import MarketplaceSource + +_ADO_URL = "https://dev.azure.com/contoso/platform/_git/tools" +_MANIFEST = {"name": "acme", "plugins": []} + + +def _ado_source(url: str = _ADO_URL, name: str = "acme", ref: str = "main") -> MarketplaceSource: + return MarketplaceSource(name=name, url=url, ref=ref) + + +def _ado_host_info() -> SimpleNamespace: + return SimpleNamespace(host="dev.azure.com", kind="ado") + + +def _fake_response(status_code: int, *, text: str = "", content_type: str = "application/json"): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.headers = {"Content-Type": content_type} + + def _raise_for_status(): + if status_code >= 400: + raise client_mod.requests.exceptions.HTTPError(f"HTTP {status_code}") + + resp.raise_for_status.side_effect = _raise_for_status + return resp + + +class _FakeResolver: + """Single-attempt stand-in for ``AuthResolver.try_with_fallback``. + + Records routing kwargs and invokes the operation with a controllable + ``(token, git_env)`` pair so the fetcher's header derivation and dispatch + can be exercised without real credential resolution. + """ + + def __init__(self, token: str | None = None, git_env: dict | None = None): + self.token = token + self.git_env = git_env or {"GIT_TERMINAL_PROMPT": "0"} + self.calls: list[dict] = [] + + def try_with_fallback(self, host, operation, *, org=None, path=None, unauth_first=False): + self.calls.append({"host": host, "org": org, "path": path, "unauth_first": unauth_first}) + return operation(self.token, self.git_env) + + +# --------------------------------------------------------------------------- +# _ado_auth_header +# --------------------------------------------------------------------------- + + +def test_ado_auth_header_none_token_is_anonymous() -> None: + assert _ado_auth_header(None, {}) == {} + + +def test_ado_auth_header_pat_uses_basic() -> None: + headers = _ado_auth_header("pat-123", {"GIT_TOKEN": "pat-123"}) + expected = base64.b64encode(b":pat-123").decode("ascii") + assert headers == {"Authorization": f"Basic {expected}"} + + +def test_ado_auth_header_bearer_detected_from_git_env() -> None: + git_env = {"GIT_CONFIG_VALUE_0": "Authorization: Bearer jwt-xyz"} + assert _ado_auth_header("jwt-xyz", git_env) == {"Authorization": "Bearer jwt-xyz"} + + +# --------------------------------------------------------------------------- +# _fetch_ado -- REST fast path +# --------------------------------------------------------------------------- + + +def test_fetch_ado_uses_rest_items_api_not_clone() -> None: + captured: list[tuple[str, dict]] = [] + + def fake_get(url, headers=None, timeout=None): + captured.append((url, dict(headers or {}))) + return _fake_response(200, text=json.dumps(_MANIFEST)) + + resolver = _FakeResolver(token="pat-abc", git_env={"GIT_TOKEN": "pat-abc"}) + with patch("apm_cli.marketplace.client._http_get", side_effect=fake_get): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + assert len(captured) == 1 + url, headers = captured[0] + assert "/contoso/platform/_apis/git/repositories/tools/items" in url + assert "path=marketplace.json" in url + assert "versionDescriptor.version=main" in url + assert "api-version=" in url + # PAT routed as HTTP Basic, never embedded in the URL. + expected = base64.b64encode(b":pat-abc").decode("ascii") + assert headers["Authorization"] == f"Basic {expected}" + assert "pat-abc" not in url + # Auth routed for the ADO org/project/repo. + assert resolver.calls[0]["org"] == "contoso" + assert resolver.calls[0]["path"] == "contoso/platform/tools" + assert resolver.calls[0]["unauth_first"] is False + + +def test_fetch_ado_bearer_context_sends_bearer_header() -> None: + captured_headers: list[dict] = [] + + def fake_get(url, headers=None, timeout=None): + captured_headers.append(dict(headers or {})) + return _fake_response(200, text=json.dumps(_MANIFEST)) + + resolver = _FakeResolver( + token="jwt-xyz", + git_env={"GIT_CONFIG_VALUE_0": "Authorization: Bearer jwt-xyz"}, + ) + with patch("apm_cli.marketplace.client._http_get", side_effect=fake_get): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + assert captured_headers[0]["Authorization"] == "Bearer jwt-xyz" + + +def test_fetch_ado_404_returns_none_without_clone() -> None: + def fake_get(url, headers=None, timeout=None): + return _fake_response(404) + + resolver = _FakeResolver(token="pat-abc") + with ( + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git") as git_fallback, + ): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result is None + git_fallback.assert_not_called() + + +def test_fetch_ado_legacy_visualstudio_host() -> None: + captured: list[str] = [] + + def fake_get(url, headers=None, timeout=None): + captured.append(url) + return _fake_response(200, text=json.dumps(_MANIFEST)) + + src = _ado_source(url="https://contoso.visualstudio.com/platform/_git/tools") + resolver = _FakeResolver(token="pat-abc") + with patch("apm_cli.marketplace.client._http_get", side_effect=fake_get): + result = _fetch_ado( + src, + "marketplace.json", + host_info=SimpleNamespace(host="contoso.visualstudio.com", kind="ado"), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + # build_ado_api_url targets dev.azure.com-style path under the legacy host. + assert "contoso.visualstudio.com/contoso/platform/_apis/git/repositories/tools" in captured[0] + + +# --------------------------------------------------------------------------- +# _fetch_ado -- generic-git fallback (no regression) +# --------------------------------------------------------------------------- + + +def test_fetch_ado_signin_html_falls_back_to_git() -> None: + """ADO 200 + text/html sign-in page (#1671) -> generic-git clone.""" + + def fake_get(url, headers=None, timeout=None): + return _fake_response(200, text="sign in", content_type="text/html") + + resolver = _FakeResolver(token=None) # anonymous -> no bearer fallback + with ( + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + git_fallback.assert_called_once() + + +def test_fetch_ado_network_error_falls_back_to_git() -> None: + def fake_get(url, headers=None, timeout=None): + raise client_mod.requests.exceptions.ConnectionError("offline") + + resolver = _FakeResolver(token="pat-abc") + with ( + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + git_fallback.assert_called_once() + + +def test_fetch_ado_non_decomposable_url_uses_git_directly() -> None: + """A URL without org/project/repo cannot be REST-fetched -> clone path.""" + bad = _ado_source(url="https://dev.azure.com/contoso") # no _git marker + resolver = _FakeResolver(token="pat-abc") + with ( + patch("apm_cli.marketplace.client._http_get") as http_get, + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + result = _fetch_ado( + bad, + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + http_get.assert_not_called() + git_fallback.assert_called_once() + + +def test_fetch_ado_invalid_json_falls_back_to_git() -> None: + def fake_get(url, headers=None, timeout=None): + return _fake_response(200, text="not json{{") + + resolver = _FakeResolver(token="pat-abc") + with ( + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + git_fallback.assert_called_once() + + +# --------------------------------------------------------------------------- +# Dispatch + real AuthResolver wiring (parity with GitLab fetcher tests) +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _no_slow_git_credential(): + """Real ``resolve()`` must not block on the git-credential helper.""" + with patch.object(GitHubTokenManager, "resolve_credential_from_git", return_value=None): + yield + + +def test_fetch_file_dispatches_ado_kind_through_rest() -> None: + """End-to-end: ``_fetch_file`` routes an ADO source to the REST items API.""" + captured: list[tuple[str, dict]] = [] + + def fake_get(url, headers=None, timeout=None): + captured.append((url, dict(headers or {}))) + return _fake_response(200, text=json.dumps(_MANIFEST)) + + with ( + patch.dict(os.environ, {"ADO_APM_PAT": "pat-real"}, clear=False), + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + ): + resolver = AuthResolver() + result = client_mod._fetch_file(_ado_source(), "marketplace.json", auth_resolver=resolver) + + assert result == _MANIFEST + assert len(captured) == 1 + url, headers = captured[0] + assert "/_apis/git/repositories/tools/items" in url + expected = base64.b64encode(b":pat-real").decode("ascii") + assert headers["Authorization"] == f"Basic {expected}" + # Proxy/GitHub-only path must not be consulted for ADO. + assert "/repos/" not in url + assert "/api/v4/" not in url + + +def test_fetch_ado_rest_auth_failure_then_git_fallback_via_resolver() -> None: + """A non-404 HTTP error bubbles out of try_with_fallback -> git fallback.""" + + def fake_get(url, headers=None, timeout=None): + return _fake_response(403, text="forbidden") + + # Pin the AAD bearer provider to "unavailable" so the PAT 403 propagates + # deterministically instead of shelling out to a real ``az`` on the host. + no_bearer = MagicMock() + no_bearer.is_available.return_value = False + + with ( + patch.dict(os.environ, {"ADO_APM_PAT": "pat-real"}, clear=False), + patch("apm_cli.core.azure_cli.get_bearer_provider", return_value=no_bearer), + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + resolver = AuthResolver() + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=AuthResolver.classify_host("dev.azure.com"), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + git_fallback.assert_called_once() + + +def test_fetch_ado_signin_page_triggers_bearer_fallback_before_clone() -> None: + """A stale PAT hitting the #1671 sign-in page must retry with AAD bearer. + + Regression guard: the sign-in MarketplaceFetchError message must contain an + ``is_ado_auth_failure_signal`` keyword so ``try_with_fallback`` attempts the + AAD bearer before ``_fetch_ado`` degrades to a clone. With the bearer + available and accepted, the REST path succeeds and ``_fetch_git`` is never + called. + """ + bearer_provider = MagicMock() + bearer_provider.is_available.return_value = True + bearer_provider.get_bearer_token.return_value = "jwt-fallback" + + def fake_get(url, headers=None, timeout=None): + auth = (headers or {}).get("Authorization", "") + if auth.startswith("Bearer "): + # Bearer retry succeeds with the real manifest. + assert auth == "Bearer jwt-fallback" + return _fake_response(200, text=json.dumps(_MANIFEST)) + # The stale PAT (Basic) gets the HTML sign-in page. + return _fake_response(200, text="sign in", content_type="text/html") + + with ( + patch.dict(os.environ, {"ADO_APM_PAT": "stale-pat"}, clear=False), + patch("apm_cli.core.azure_cli.get_bearer_provider", return_value=bearer_provider), + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git") as git_fallback, + ): + resolver = AuthResolver() + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=AuthResolver.classify_host("dev.azure.com"), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + bearer_provider.get_bearer_token.assert_called_once() + git_fallback.assert_not_called() + + +def test_fetch_ado_rest_raises_is_swallowed_into_fallback() -> None: + """A MarketplaceFetchError raised inside the operation triggers fallback.""" + sentinel = MarketplaceFetchError("acme", "boom") + + def fake_get(url, headers=None, timeout=None): + raise sentinel + + resolver = _FakeResolver(token="pat-abc") + with ( + patch("apm_cli.marketplace.client._http_get", side_effect=fake_get), + patch("apm_cli.marketplace.client._fetch_git", return_value=_MANIFEST) as git_fallback, + ): + result = _fetch_ado( + _ado_source(), + "marketplace.json", + host_info=_ado_host_info(), + auth_resolver=resolver, + ) + + assert result == _MANIFEST + git_fallback.assert_called_once() diff --git a/tests/unit/marketplace/test_client_git.py b/tests/unit/marketplace/test_client_git.py index f5131ea40..f767d9efc 100644 --- a/tests/unit/marketplace/test_client_git.py +++ b/tests/unit/marketplace/test_client_git.py @@ -21,6 +21,7 @@ from apm_cli.marketplace.client import ( _FETCHERS, + _fetch_ado, _fetch_git, _fetch_github, _fetch_gitlab, @@ -75,7 +76,12 @@ def test_fetch_git_calls_gitcache_with_sparse_path( def test_fetch_git_ado_url_routes_via_subprocess( tmp_path: Path, fake_host_info, fake_auth_resolver ) -> None: - """ADO URLs go through ``GitCache`` -- not the REST API.""" + """``_fetch_git`` (the ADO REST fallback path) still clones via ``GitCache``. + + ADO marketplace reads now prefer ``_fetch_ado`` (REST items API); this test + pins the generic-git fallback that ``_fetch_ado`` delegates to on REST + failure -- it must keep building the auth env and cloning. + """ checkout = tmp_path / "checkout" checkout.mkdir() (checkout / "marketplace.json").write_text("{}") @@ -174,5 +180,6 @@ def test_fetchers_dispatch_table_routes_kinds_to_correct_callable() -> None: assert _FETCHERS["local"] is _fetch_local assert _FETCHERS["github"] is _fetch_github assert _FETCHERS["gitlab"] is _fetch_gitlab + assert _FETCHERS["ado"] is _fetch_ado # Defensive: no extra entries silently appearing. - assert set(_FETCHERS) == {"github", "gitlab", "git", "local"} + assert set(_FETCHERS) == {"github", "gitlab", "ado", "git", "local"} diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index f2b3485d4..b4920c3d4 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -310,6 +310,61 @@ def test_build_ado_api_url(): assert "api-version=7.0" in url +def test_parse_ado_repo_url_dev_azure(): + """dev.azure.com encodes the org as the first path segment.""" + assert github_host.parse_ado_repo_url("https://dev.azure.com/contoso/platform/_git/tools") == ( + "contoso", + "platform", + "tools", + ) + + +def test_parse_ado_repo_url_visualstudio_legacy(): + """*.visualstudio.com encodes the org in the subdomain.""" + assert github_host.parse_ado_repo_url( + "https://contoso.visualstudio.com/platform/_git/tools" + ) == ("contoso", "platform", "tools") + + +def test_parse_ado_repo_url_strips_git_suffix_and_subpath(): + assert github_host.parse_ado_repo_url( + "https://dev.azure.com/contoso/platform/_git/tools.git" + ) == ("contoso", "platform", "tools") + # Extra (virtual) segments after the repo are ignored. + assert github_host.parse_ado_repo_url( + "https://dev.azure.com/contoso/platform/_git/tools/sub/dir" + ) == ("contoso", "platform", "tools") + + +def test_parse_ado_repo_url_percent_decodes_segments(): + assert github_host.parse_ado_repo_url( + "https://dev.azure.com/contoso/my%20project/_git/tools" + ) == ("contoso", "my project", "tools") + + +def test_parse_ado_repo_url_round_trips_through_build_ado_api_url(): + org, project, repo = github_host.parse_ado_repo_url( + "https://dev.azure.com/contoso/platform/_git/tools" + ) + url = github_host.build_ado_api_url(org, project, repo, "marketplace.json", "main") + assert "/contoso/platform/_apis/git/repositories/tools/items" in url + + +@pytest.mark.parametrize( + "url", + [ + "https://github.com/owner/repo", # not an ADO host + "https://dev.azure.com/contoso/platform/tools", # missing _git marker + "https://dev.azure.com/contoso/_git/tools", # missing project segment + "https://dev.azure.com/contoso/platform/_git", # missing repo segment + "", + None, + ], +) +def test_parse_ado_repo_url_returns_none_for_non_ado_or_malformed(url): + assert github_host.parse_ado_repo_url(url) is None + + def test_build_authorization_header_git_env_bearer(): """Bearer scheme produces correct GIT_CONFIG_* env overlay.""" env = github_host.build_authorization_header_git_env("Bearer", "eyJabc.def.ghi")