Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/apm_cli/commands/marketplace/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ def _display_source_kind(kind: str, is_direct_url: bool) -> str:
labels = {
"github": "GitHub repository",
"gitlab": "GitLab repository",
"ado": "Azure DevOps repository",
"git": "generic git repository",
"local": "local filesystem path",
}
Expand Down
174 changes: 164 additions & 10 deletions src/apm_cli/marketplace/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

- ``github`` / ``gitlab`` -> host file API via ``_fetch_via_api`` (auth-routed
through ``AuthResolver.try_with_fallback`` and the JSON sidecar cache).
- ``git`` -> generic git URL (ADO, Gitea, self-hosted, etc.) via subprocess
- ``ado`` -> Azure DevOps REST items API (``_fetch_ado``, auth-routed through
``AuthResolver.try_with_fallback`` with the JSON sidecar cache), falling back
to the generic-git path on any REST/transport failure.
- ``git`` -> generic git URL (Gitea, self-hosted, etc.) via subprocess
through ``GitCache``; ``git ls-remote`` is the freshness check, no JSON
sidecar cache.
- ``local`` -> bare repo (``git --git-dir=... show <ref>:<file>``), working
Expand All @@ -17,6 +20,7 @@
``~/.apm/cache/marketplace/`` with a 1-hour TTL.
"""

import base64
import contextlib
import hashlib
import json
Expand Down Expand Up @@ -119,10 +123,12 @@ def _cache_key(source: MarketplaceSource) -> str:
return f"url__{hashlib.sha256(source.url.encode()).hexdigest()[:16]}"
if kind == "local":
return f"local__{_sanitize_cache_name(source.name)}"
if kind == "git":
# Generic git: include host so a.com/o/r vs b.com/o/r never collapse.
if kind in ("git", "ado"):
# Generic git / ADO: include host so a.com/o/r vs b.com/o/r never
# collapse, and prefix by kind so the same host on the two paths keeps
# distinct sidecar files.
host = _host_from_url(source.url) or source.host or "unknown"
return f"git__{_sanitize_cache_name(host)}__{_sanitize_cache_name(source.name)}"
return f"{kind}__{_sanitize_cache_name(host)}__{_sanitize_cache_name(source.name)}"
normalized_host = (source.host or "github.com").lower()
if normalized_host == "github.com":
return source.name
Expand Down Expand Up @@ -567,6 +573,153 @@ def _fetch_git(
raise MarketplaceFetchError(source.name, f"failed to read {file_path}: {exc}") from exc


# ---------------------------------------------------------------------------
# Network fetch -- Azure DevOps REST items API (fast path, git fallback)
# ---------------------------------------------------------------------------


class _AdoItemNotFound(Exception):
"""Sentinel: the ADO items API returned a confirmed 404 for the path.

Distinguishes "the file is definitively absent at this ref" (map to
``None`` so ``_auto_detect_path`` can probe the next candidate) from a
transport/auth failure (fall back to the generic-git clone path).
"""


def _ado_auth_header(token: str | None, git_env: dict | None) -> dict[str, str]:
"""Build the Azure DevOps ``Authorization`` header for a resolved token.

``AuthResolver.try_with_fallback`` hands the operation a ``(token, git_env)``
pair but not the auth scheme. Bearer contexts carry the full
``Authorization: Bearer <jwt>`` header in ``GIT_CONFIG_VALUE_0`` (see
``AuthResolver._build_git_env``); detect that and emit the Bearer scheme.
Otherwise treat the token as an ADO PAT and use HTTP Basic with
``base64(":" + PAT)`` per ADO's convention (empty username, PAT as
password). Returns an empty dict for an anonymous request.

The returned dict carries the credential -- callers MUST NOT log it.
"""
if not token:
return {}
extra_header = (git_env or {}).get("GIT_CONFIG_VALUE_0", "")
if extra_header.lower().startswith("authorization: bearer "):
return {"Authorization": f"Bearer {token}"}
encoded = base64.b64encode(f":{token}".encode()).decode("ascii")
return {"Authorization": f"Basic {encoded}"}


def _fetch_ado_rest(
source: MarketplaceSource,
file_path: str,
*,
org: str,
project: str,
repo: str,
host: str,
auth_resolver,
) -> dict | None:
"""Read a single metadata file from Azure DevOps via the REST items API.

Routes auth through ``AuthResolver.try_with_fallback`` for the ADO host so
a resolved PAT (``ADO_APM_PAT``) is tried first and an AAD bearer (``az``)
is the runtime fallback -- the same auth posture as the clone path. The
token is never logged. Raises on any failure (network, auth, non-JSON,
sign-in page) so the caller can fall back to the generic-git path; raises
``_AdoItemNotFound`` for a confirmed 404.
"""
from ..utils.github_host import build_ado_api_url

url = build_ado_api_url(org, project, repo, file_path, source.ref, host)

def _do_fetch(token, git_env):
headers = {"User-Agent": "apm-cli"}
headers.update(_ado_auth_header(token, git_env))
resp = _http_get(url, headers=headers, timeout=30)
if resp.status_code == 404:
# No message: this sentinel flows through ``try_with_fallback`` ->
# ``is_ado_auth_failure_signal(str(exc))``; an empty string never
# trips an auth-failure keyword, so a 404 never wastes a bearer
# retry.
raise _AdoItemNotFound
# ADO answers an unauthenticated/under-scoped request with HTTP 200 +
# an HTML sign-in page rather than a 401 (#1671). Treat that as an auth
# failure so try_with_fallback can attempt the AAD bearer before we
# give up and clone. The word "unauthorized" is load-bearing: it makes
# ``is_ado_auth_failure_signal(str(exc))`` match, which is the gate the
# PAT->bearer fallback checks (see AuthResolver._try_ado_bearer_fallback).
if resp.status_code == 200:
content_type = resp.headers.get("Content-Type", "").lower()
if "text/html" in content_type:
raise MarketplaceFetchError(
source.name,
"Azure DevOps returned a sign-in page (unauthorized: authentication required)",
)
resp.raise_for_status()
return _parse_json_text(resp)

return auth_resolver.try_with_fallback(
host,
_do_fetch,
org=org,
path=f"{org}/{project}/{repo}",
unauth_first=False,
)


def _fetch_ado(
source: MarketplaceSource,
file_path: str,
*,
host_info,
auth_resolver,
) -> dict | None:
"""Fetch marketplace.json from Azure DevOps, REST-first with git fallback.

Optional latency optimisation over the generic-git path: ADO single-file
metadata reads go through ``GET .../_apis/git/repositories/{repo}/items``
instead of a subprocess clone, matching the GitHub/GitLab fast path.

Falls back to ``_fetch_git`` (the subprocess clone) on any REST/transport
failure or offline condition so there is no regression vs. the prior
behaviour. A confirmed 404 returns ``None`` (the file is absent at this
path) so ``_auto_detect_path`` can probe the next candidate without paying
for a clone that would also miss.
"""
from ..utils.github_host import parse_ado_repo_url

parsed = parse_ado_repo_url(source.url)
if parsed is None:
# URL does not decompose into org/project/repo (unusual ADO shape) --
# nothing to REST against, so use the generic-git path directly.
return _fetch_git(source, file_path, host_info=host_info, auth_resolver=auth_resolver)

org, project, repo = parsed
host = host_info.host if host_info is not None else "dev.azure.com"
try:
return _fetch_ado_rest(
source,
file_path,
org=org,
project=project,
repo=repo,
host=host,
auth_resolver=auth_resolver,
)
except _AdoItemNotFound:
return None
except Exception as exc:
# REST failed (network, auth exhausted, sign-in page, malformed JSON,
# 5xx, ...). Fall back to the clone path so offline/unusual repos keep
# working. str(exc) only -- never interpolate the response/headers.
logger.debug(
"ADO REST metadata fetch failed for '%s'; falling back to generic-git: %s",
source.name,
exc,
)
return _fetch_git(source, file_path, host_info=host_info, auth_resolver=auth_resolver)


# ---------------------------------------------------------------------------
# Local fetch (filesystem path or file://)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -720,6 +873,7 @@ def _fetch_local_direct_read(
_FETCHERS: dict[str, Callable] = {
"github": _fetch_github,
"gitlab": _fetch_gitlab,
"ado": _fetch_ado,
"git": _fetch_git,
"local": _fetch_local,
}
Expand Down Expand Up @@ -837,9 +991,9 @@ def _fetch_file(
host_info = None
if kind in ("github", "gitlab"):
host_info = AuthResolver.classify_host(source.host)
elif kind == "git":
# For generic git, classify the host extracted from the URL so ADO etc.
# get correctly-typed auth contexts.
elif kind in ("git", "ado"):
# For ADO and generic git, classify the host extracted from the URL so
# each gets a correctly-typed auth context (ADO PAT/bearer routing).
host = _host_from_url(source.url)
host_info = AuthResolver.classify_host(host) if host else None

Expand Down Expand Up @@ -891,8 +1045,8 @@ def fetch_marketplace(
) -> MarketplaceManifest:
"""Fetch and parse a marketplace manifest.

Uses the JSON sidecar cache for ``kind in ("github", "gitlab", "url")``.
Generic-git fetches rely on ``GitCache`` + ``git ls-remote`` for
Uses the JSON sidecar cache for ``kind in ("github", "gitlab", "ado",
"url")``. Generic-git fetches rely on ``GitCache`` + ``git ls-remote`` for
freshness; local fetches read directly without caching.

Args:
Expand All @@ -907,7 +1061,7 @@ def fetch_marketplace(
MarketplaceFetchError: If fetch fails and no cache is available.
"""
cache_name = _cache_key(source)
use_sidecar_cache = source.kind in ("github", "gitlab", "url")
use_sidecar_cache = source.kind in ("github", "gitlab", "ado", "url")

# Try fresh cache first (API kinds only)
if use_sidecar_cache and not force_refresh:
Expand Down
8 changes: 6 additions & 2 deletions src/apm_cli/marketplace/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,16 @@ def is_remote_manifest_url(self) -> bool:

@property
def kind(self) -> str:
"""Derived source kind: ``local`` | ``url`` | ``github`` | ``gitlab`` | ``git``.
"""Derived source kind: ``local`` | ``url`` | ``github`` | ``gitlab`` | ``ado`` | ``git``.

Classification:
- Local filesystem path or ``file://`` URI -> ``local``
- Direct remote marketplace.json URL (``path == ""``) -> ``url``
- Host classified by AuthResolver as github/ghe_cloud/ghes -> ``github``
- Host classified as gitlab -> ``gitlab``
- Anything else (ado, generic, ssh to non-classified host) -> ``git``
- Host classified as Azure DevOps -> ``ado`` (REST items fast path with
generic-git fallback; see ``marketplace.client._fetch_ado``)
- Anything else (generic, ssh to non-classified host) -> ``git``
"""
if not self.url or _looks_like_local_path(self.url):
return "local"
Expand All @@ -205,6 +207,8 @@ def kind(self) -> str:
return "github"
if host_kind == "gitlab":
return "gitlab"
if host_kind == "ado":
return "ado"
return "git"

@property
Expand Down
7 changes: 4 additions & 3 deletions src/apm_cli/marketplace/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,16 +246,17 @@ def _source_needs_explicit_git_path(source: MarketplaceSource) -> bool:
For URL-first sources, the ``kind`` derivation already encodes the routing
decision: any host APM doesn't classify as github-family needs the explicit
git+path canonical (mirrors the existing GitLab self-managed pattern), and
that now includes Azure DevOps and generic git hosts since their
``marketplace.json`` is fetched via subprocess git instead of an API.
that includes Azure DevOps (``ado``) and generic git hosts. ADO metadata is
read via the REST items fast path with a subprocess-git fallback, but its
plugin sources still need the explicit git+path canonical like generic git.

Local marketplaces handle relative sources via :func:`_resolve_local_relative_source`
on the fast path and never reach this helper.
"""
kind = source.kind
if kind == "github":
return False
if kind in ("gitlab", "git"):
if kind in ("gitlab", "git", "ado"):
return True
# Fall back to legacy host-based behaviour for any kind we don't recognise
return _marketplace_host_needs_explicit_git_path(source.host)
Expand Down
59 changes: 59 additions & 0 deletions src/apm_cli/utils/github_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,65 @@ def build_ado_api_url(
)


def parse_ado_repo_url(url: str | None) -> tuple[str, str, str] | None:
"""Decompose an Azure DevOps repo URL into ``(org, project, repo)``.

Inverse of :func:`build_ado_https_clone_url` -- accepts the standard
``_git`` clone shape on both ADO hostnames:

- ``https://dev.azure.com/{org}/{project}/_git/{repo}`` (org in path)
- ``https://{org}.visualstudio.com/{project}/_git/{repo}`` (org in subdomain)

A trailing ``.git`` and any segments after ``{repo}`` (virtual sub-paths)
are ignored. Path segments are percent-decoded so an encoded project name
(e.g. ``my%20project``) round-trips; callers that rebuild a URL (e.g.
:func:`build_ado_api_url`) re-encode as needed.

Returns ``None`` when *url* is not an ADO host or does not contain the
``_git`` marker, so callers can fall back to a generic code path rather
than guessing at a malformed decomposition.
"""
if not url:
return None
try:
parsed = urllib.parse.urlsplit(url)
except ValueError:
return None
hostname = parsed.hostname or ""
if not is_azure_devops_hostname(hostname):
return None

path = parsed.path.strip("/")
if path.endswith(".git"):
path = path[: -len(".git")]
segments = [urllib.parse.unquote(s) for s in path.split("/") if s]
if "_git" not in segments:
return None
git_idx = segments.index("_git")
# repo is the segment immediately after the ``_git`` marker.
if git_idx + 1 >= len(segments):
return None
repo = segments[git_idx + 1]
before = segments[:git_idx]

if is_visualstudio_legacy_hostname(hostname):
# Legacy ``*.visualstudio.com``: org lives in the subdomain, the path
# before ``_git`` is just the project.
org = hostname.split(".")[0]
if len(before) < 1:
return None
project = before[-1]
else:
# ``dev.azure.com``: org and project both precede ``_git``.
if len(before) < 2:
return None
org, project = before[0], before[1]

if not (org and project and repo):
return None
return org, project, repo


def is_artifactory_path(path_segments: list) -> bool:
"""Return True if path segments indicate a JFrog Artifactory VCS repository.

Expand Down
Loading