From 0da78e0ef91d7e986756ee181dcf09638c6b6cae Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Wed, 24 Jun 2026 20:34:29 -0400 Subject: [PATCH 1/3] fix(input): support scp-style SSH Git URLs in host validation --- src/skillspector/input_handler.py | 10 +++++++ tests/unit/test_input_handler.py | 47 ++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/skillspector/input_handler.py b/src/skillspector/input_handler.py index b70d0f2..bc3d72e 100644 --- a/src/skillspector/input_handler.py +++ b/src/skillspector/input_handler.py @@ -29,6 +29,7 @@ from __future__ import annotations import ipaddress +import re import shutil import socket import subprocess @@ -160,6 +161,13 @@ def _is_file_url(self, path: str) -> bool: return False return not self._is_git_url(path) + def _extract_scp_host(self, url: str) -> str | None: + """Return the host from an scp-style Git URL, or None if not scp form.""" + if "://" in url: + return None + m = re.match(r"^[^@/]+@([^:/]+):.+$", url) + return m.group(1) if m else None + def _validate_url_host(self, url: str, allowed_hosts: frozenset[str]) -> str: """Validate URL host against allowlist and SSRF protections. @@ -167,6 +175,8 @@ def _validate_url_host(self, url: str, allowed_hosts: frozenset[str]) -> str: """ parsed = urlparse(url) host = parsed.hostname or "" + if not host: + host = self._extract_scp_host(url) or "" if not host: raise ValueError(f"URL has no valid hostname: {url}") if not any(host == allowed or host.endswith("." + allowed) for allowed in allowed_hosts): diff --git a/tests/unit/test_input_handler.py b/tests/unit/test_input_handler.py index 28167e6..f0c2c57 100644 --- a/tests/unit/test_input_handler.py +++ b/tests/unit/test_input_handler.py @@ -16,10 +16,11 @@ """Tests for skillspector input_handler (resolve directory, zip, single file).""" from pathlib import Path +from unittest.mock import MagicMock, patch import pytest -from skillspector.input_handler import InputHandler +from skillspector.input_handler import ALLOWED_GIT_HOSTS, InputHandler def test_resolve_directory(tmp_path: Path) -> None: @@ -94,3 +95,47 @@ def test_cleanup_idempotent(tmp_path: Path) -> None: handler.resolve(str(tmp_path / "a.md")) handler.cleanup() handler.cleanup() + + +def test_scp_url_is_git_url() -> None: + """scp-style SSH URL is recognised as a Git URL.""" + assert InputHandler()._is_git_url("git@github.com:org/repo.git") is True + + +def test_validate_url_host_scp_extracts_github() -> None: + """_validate_url_host extracts 'github.com' from an scp-style URL.""" + host = InputHandler()._validate_url_host("git@github.com:org/repo.git", ALLOWED_GIT_HOSTS) + assert host == "github.com" + + +def test_scp_valid_host_clones() -> None: + """resolve() calls git clone with the scp URL when the host is allowed.""" + handler = InputHandler() + try: + with patch( + "skillspector.input_handler.subprocess.run", return_value=MagicMock() + ) as mock_run: + handler.resolve("git@github.com:org/repo.git") + assert mock_run.called + call_args = mock_run.call_args[0][0] + assert "git@github.com:org/repo.git" in call_args + finally: + handler.cleanup() + + +def test_scp_disallowed_host_raises() -> None: + """_validate_url_host rejects an scp URL whose host is not in the allowlist.""" + with pytest.raises(ValueError, match="not in the allowed hosts"): + InputHandler()._validate_url_host("git@malicious.internal:org/repo.git", ALLOWED_GIT_HOSTS) + + +def test_scp_private_ip_raises() -> None: + """_validate_url_host rejects an scp URL with a link-local IP host.""" + with pytest.raises(ValueError): + InputHandler()._validate_url_host("git@169.254.169.254:org/repo.git", ALLOWED_GIT_HOSTS) + + +def test_https_url_unchanged() -> None: + """https URLs continue to extract the host via urlparse without hitting the scp fallback.""" + host = InputHandler()._validate_url_host("https://github.com/org/repo.git", ALLOWED_GIT_HOSTS) + assert host == "github.com" From aee44e30ab7bf8826fbd43c8065cf82779dd6aaf Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Wed, 24 Jun 2026 20:43:39 -0400 Subject: [PATCH 2/3] test(input): clarify scp_private_ip test covers allowlist gate Signed-off-by: Rod Boev --- tests/unit/test_input_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_input_handler.py b/tests/unit/test_input_handler.py index f0c2c57..d9f1106 100644 --- a/tests/unit/test_input_handler.py +++ b/tests/unit/test_input_handler.py @@ -130,7 +130,7 @@ def test_scp_disallowed_host_raises() -> None: def test_scp_private_ip_raises() -> None: - """_validate_url_host rejects an scp URL with a link-local IP host.""" + """_validate_url_host rejects an scp URL whose extracted host is not in the allowlist.""" with pytest.raises(ValueError): InputHandler()._validate_url_host("git@169.254.169.254:org/repo.git", ALLOWED_GIT_HOSTS) From 0657d8d179690d9ad16f37bd8b35f1d71f742ea0 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Wed, 24 Jun 2026 20:57:06 -0400 Subject: [PATCH 3/3] test(input): add SSRF gate coverage for scp-extracted hosts Signed-off-by: Rod Boev --- tests/unit/test_input_handler.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_input_handler.py b/tests/unit/test_input_handler.py index d9f1106..f90507f 100644 --- a/tests/unit/test_input_handler.py +++ b/tests/unit/test_input_handler.py @@ -139,3 +139,10 @@ def test_https_url_unchanged() -> None: """https URLs continue to extract the host via urlparse without hitting the scp fallback.""" host = InputHandler()._validate_url_host("https://github.com/org/repo.git", ALLOWED_GIT_HOSTS) assert host == "github.com" + + +def test_scp_ssrf_gate_fires() -> None: + """SSRF gate raises ValueError for an scp URL whose host resolves to a private IP.""" + with patch("skillspector.input_handler._is_private_ip", return_value=True): + with pytest.raises(ValueError, match="private/internal IP"): + InputHandler()._validate_url_host("git@github.com:org/repo.git", ALLOWED_GIT_HOSTS)