From 1b21b0df7353b317cc3d317401a24713995f1668 Mon Sep 17 00:00:00 2001 From: Evo Date: Sun, 21 Jun 2026 05:17:35 +0800 Subject: [PATCH] fix(utils): normalize trailing-dot/case in code-hosting host matching code_hosting_utils lowercased hosts but did not strip the trailing root dot, and the git@ branches compared the raw host against un-normalized config domains. A valid trailing-dot FQDN (github.com.) or an uppercase github_domains entry was therefore silently not recognized as a code- hosting URL, mis-routing the accessor. Apply network_guard._normalize_host's rstrip('.').lower() canonicalization symmetrically across every host<->domain comparison (consistency with the just-merged #2689 Feishu host-match hardening). Normalization only widens matching to the correct canonical host; valid matches are unaffected. Adds regression tests. --- openviking/utils/code_hosting_utils.py | 39 ++++++++++++++++++-------- tests/test_code_hosting_utils.py | 34 ++++++++++++++++++++++ 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/openviking/utils/code_hosting_utils.py b/openviking/utils/code_hosting_utils.py index a9470787cb..3d56d8cad7 100644 --- a/openviking/utils/code_hosting_utils.py +++ b/openviking/utils/code_hosting_utils.py @@ -7,12 +7,27 @@ platforms like GitHub and GitLab. """ +from collections.abc import Iterable from typing import Optional from urllib.parse import ParseResult, parse_qs, unquote, urlparse from openviking_cli.utils.config import get_openviking_config +def _normalize_host(host: str) -> str: + """Normalize a host for case- and trailing-dot-insensitive matching. + + Mirrors :func:`openviking.utils.network_guard._normalize_host` so code-hosting + URL matching agrees with the egress allowlist: a fully-qualified trailing-dot + host (``github.com.``) and any letter casing resolve to the same canonical form. + """ + return host.rstrip(".").lower() + + +def _normalize_domains(domains: Iterable[str]) -> set[str]: + return {_normalize_host(domain) for domain in domains} + + def _domain_matches(parsed: ParseResult, domains: list[str]) -> bool: """Return True when parsed URL host matches configured domains. @@ -25,8 +40,8 @@ def _domain_matches(parsed: ParseResult, domains: list[str]) -> bool: if not hostname: return False - normalized_domains = {domain.lower() for domain in domains} - host = hostname.lower() + normalized_domains = _normalize_domains(domains) + host = _normalize_host(hostname) candidates = {host} try: @@ -45,10 +60,10 @@ def _extract_host(url: str) -> str: rest = url[4:] if ":" not in rest: return "" - return rest.split(":", 1)[0].strip().lower() + return _normalize_host(rest.split(":", 1)[0].strip()) parsed = urlparse(url) - return (parsed.hostname or parsed.netloc or "").strip().lower() + return _normalize_host((parsed.hostname or parsed.netloc or "").strip()) def _get_all_domains() -> list[str]: @@ -122,10 +137,11 @@ def parse_code_hosting_url(url: str) -> Optional[str]: if ":" not in url[4:]: return None host_part, path_part = url[4:].split(":", 1) - if host_part not in all_domains: + host_part = _normalize_host(host_part) + if host_part not in _normalize_domains(all_domains): return None path_parts = [p for p in path_part.split("/") if p] - if host_part in _get_azure_devops_domains(): + if host_part in _normalize_domains(_get_azure_devops_domains()): azure_repo_parts = _extract_azure_devops_ssh_repo_parts(path_parts) if azure_repo_parts: return "/".join( @@ -183,7 +199,7 @@ def is_github_url(url: str) -> bool: True if the URL is a GitHub URL """ config = get_openviking_config() - return _extract_host(url) in config.code.github_domains + return _extract_host(url) in _normalize_domains(config.code.github_domains) def is_gitlab_url(url: str) -> bool: @@ -196,7 +212,7 @@ def is_gitlab_url(url: str) -> bool: True if the URL is a GitLab URL """ config = get_openviking_config() - return _extract_host(url) in config.code.gitlab_domains + return _extract_host(url) in _normalize_domains(config.code.gitlab_domains) def is_code_hosting_url(url: str) -> bool: @@ -215,7 +231,7 @@ def is_code_hosting_url(url: str) -> bool: if ":" not in url[4:]: return False host_part = url[4:].split(":", 1)[0] - return host_part in all_domains + return _normalize_host(host_part) in _normalize_domains(all_domains) return _domain_matches(urlparse(url), all_domains) @@ -264,7 +280,7 @@ def is_git_repo_url(url: str) -> bool: if path_parts and path_parts[-1].endswith(".git"): path_parts[-1] = path_parts[-1][:-4] - if _extract_host(url) in _get_azure_devops_domains(): + if _extract_host(url) in _normalize_domains(_get_azure_devops_domains()): azure_repo_parts = _extract_azure_devops_repo_parts(path_parts) if azure_repo_parts: if _is_azure_devops_browse_url(parsed.query): @@ -284,7 +300,8 @@ def is_git_repo_url(url: str) -> bool: "wiki", } if ( - _extract_host(url) in config.code.github_domains + config.code.gitlab_domains + _extract_host(url) + in _normalize_domains(config.code.github_domains + config.code.gitlab_domains) and len(path_parts) >= 3 and path_parts[2] in non_repo_paths ): diff --git a/tests/test_code_hosting_utils.py b/tests/test_code_hosting_utils.py index dfc8230709..838b0af2ee 100644 --- a/tests/test_code_hosting_utils.py +++ b/tests/test_code_hosting_utils.py @@ -290,3 +290,37 @@ def test_is_git_repo_url_unknown_domain(): def test_is_git_repo_url_single_segment(): assert is_git_repo_url("https://github.com/org") is False + + +# --- host normalization (trailing dot / case), consistency with #2689 + network_guard --- + + +def test_parse_code_hosting_url_https_trailing_dot(): + assert parse_code_hosting_url("https://github.com./org/repo") == "org/repo" + + +def test_parse_code_hosting_url_git_ssh_uppercase_host(): + assert parse_code_hosting_url("git@GitHub.com:org/repo.git") == "org/repo" + + +def test_is_github_url_trailing_dot(): + assert is_github_url("https://github.com./org/repo") is True + + +def test_is_code_hosting_url_trailing_dot(): + assert is_code_hosting_url("https://github.com./org/repo") is True + + +def test_is_github_url_uppercase_config_domain(): + def upper_cfg(): + return SimpleNamespace( + code=SimpleNamespace( + github_domains=["GITHUB.COM"], + gitlab_domains=[], + azure_devops_domains=[], + code_hosting_domains=["GITHUB.COM"], + ) + ) + + with patch.object(_module, "get_openviking_config", side_effect=upper_cfg): + assert is_github_url("https://github.com/org/repo") is True