From c3ffbfd1b29a74a05045e418d7136798b0075f2a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 00:20:44 +0000 Subject: [PATCH 1/7] Initial plan From fc215e5adac73dff017afe9f9298dcb6ef9149cd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 00:24:31 +0000 Subject: [PATCH 2/7] Add URL validation to prevent SSRF attacks Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- main.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 544f2a1..53f1b43 100644 --- a/main.py +++ b/main.py @@ -19,7 +19,9 @@ import logging import time import re +import ipaddress from typing import Dict, List, Optional, Any, Set, Sequence +from urllib.parse import urlparse import httpx from dotenv import load_dotenv @@ -99,11 +101,58 @@ def _api_client() -> httpx.Client: def validate_folder_url(url: str) -> bool: - """Validate that the folder URL is safe (HTTPS only).""" + """ + Validate that the folder URL is safe (HTTPS only, trusted domains). + Blocks SSRF attacks by allowing only trusted domains and blocking private IPs. + """ + # Check HTTPS protocol if not url.startswith("https://"): - log.warning(f"Skipping unsafe or invalid URL: {url}") + log.warning(f"Skipping unsafe or invalid URL (not HTTPS): {url}") + return False + + # Allowlist of trusted domains + ALLOWED_DOMAINS = { + "github.com", + "githubusercontent.com", + "raw.githubusercontent.com", + } + + try: + parsed = urlparse(url) + host = parsed.hostname + + if not host: + log.warning(f"Skipping invalid URL (no hostname): {url}") + return False + + # Check if host is in allowlist (including subdomains) + host_lower = host.lower() + is_allowed = False + for allowed_domain in ALLOWED_DOMAINS: + if host_lower == allowed_domain or host_lower.endswith(f".{allowed_domain}"): + is_allowed = True + break + + if not is_allowed: + log.warning(f"Skipping URL from untrusted domain: {url} (host: {host})") + return False + + # Additional safety: check if the hostname is an IP address + # If it is, ensure it's not a private IP + try: + ip = ipaddress.ip_address(host) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + log.warning(f"Skipping URL with private/internal IP address: {url}") + return False + except ValueError: + # Not an IP address, which is fine - it's a domain name + pass + + return True + + except Exception as e: + log.warning(f"Error validating URL {url}: {e}") return False - return True def validate_profile_id(profile_id: str) -> bool: From ec8432ab79d906a6bb87c813be278238e999e1c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 13 Dec 2025 00:25:36 +0000 Subject: [PATCH 3/7] Address code review feedback: fix validation order and remove redundant entry Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- main.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index 53f1b43..db22c1e 100644 --- a/main.py +++ b/main.py @@ -114,7 +114,6 @@ def validate_folder_url(url: str) -> bool: ALLOWED_DOMAINS = { "github.com", "githubusercontent.com", - "raw.githubusercontent.com", } try: @@ -125,6 +124,17 @@ def validate_folder_url(url: str) -> bool: log.warning(f"Skipping invalid URL (no hostname): {url}") return False + # First, check if the hostname is an IP address + # If it is, ensure it's not a private IP (reject all direct IP access) + try: + ip = ipaddress.ip_address(host) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + log.warning(f"Skipping URL with private/internal IP address: {url}") + return False + except ValueError: + # Not an IP address, it's a domain name - continue with domain validation + pass + # Check if host is in allowlist (including subdomains) host_lower = host.lower() is_allowed = False @@ -137,17 +147,6 @@ def validate_folder_url(url: str) -> bool: log.warning(f"Skipping URL from untrusted domain: {url} (host: {host})") return False - # Additional safety: check if the hostname is an IP address - # If it is, ensure it's not a private IP - try: - ip = ipaddress.ip_address(host) - if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: - log.warning(f"Skipping URL with private/internal IP address: {url}") - return False - except ValueError: - # Not an IP address, which is fine - it's a domain name - pass - return True except Exception as e: From f3db448e612cd21381b4427f88dd65a902205f67 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Fri, 12 Dec 2025 18:33:10 -0600 Subject: [PATCH 4/7] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index db22c1e..2fbc192 100644 --- a/main.py +++ b/main.py @@ -128,7 +128,14 @@ def validate_folder_url(url: str) -> bool: # If it is, ensure it's not a private IP (reject all direct IP access) try: ip = ipaddress.ip_address(host) - if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + or ip.is_unspecified + ): log.warning(f"Skipping URL with private/internal IP address: {url}") return False except ValueError: From 134541a97cfcc66928519028b691691124ce212f Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Fri, 12 Dec 2025 18:33:17 -0600 Subject: [PATCH 5/7] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- main.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 2fbc192..2d76515 100644 --- a/main.py +++ b/main.py @@ -144,11 +144,10 @@ def validate_folder_url(url: str) -> bool: # Check if host is in allowlist (including subdomains) host_lower = host.lower() - is_allowed = False - for allowed_domain in ALLOWED_DOMAINS: - if host_lower == allowed_domain or host_lower.endswith(f".{allowed_domain}"): - is_allowed = True - break + is_allowed = any( + host_lower == allowed_domain or host_lower.endswith(f".{allowed_domain}") + for allowed_domain in ALLOWED_DOMAINS + ) if not is_allowed: log.warning(f"Skipping URL from untrusted domain: {url} (host: {host})") From 16563d0c7630af98e728ddddc2c55765b8e791d8 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Fri, 12 Dec 2025 18:33:27 -0600 Subject: [PATCH 6/7] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- main.py | 61 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/main.py b/main.py index 2d76515..03e5c21 100644 --- a/main.py +++ b/main.py @@ -125,39 +125,38 @@ def validate_folder_url(url: str) -> bool: return False # First, check if the hostname is an IP address - # If it is, ensure it's not a private IP (reject all direct IP access) - try: - ip = ipaddress.ip_address(host) - if ( - ip.is_private - or ip.is_loopback - or ip.is_link_local - or ip.is_reserved - or ip.is_multicast - or ip.is_unspecified - ): - log.warning(f"Skipping URL with private/internal IP address: {url}") - return False - except ValueError: - # Not an IP address, it's a domain name - continue with domain validation - pass - - # Check if host is in allowlist (including subdomains) - host_lower = host.lower() - is_allowed = any( - host_lower == allowed_domain or host_lower.endswith(f".{allowed_domain}") - for allowed_domain in ALLOWED_DOMAINS - ) - - if not is_allowed: - log.warning(f"Skipping URL from untrusted domain: {url} (host: {host})") - return False - - return True - except Exception as e: - log.warning(f"Error validating URL {url}: {e}") + log.warning(f"Parsing error while validating URL {url}: {e}") + return False + + if not host: + log.warning(f"Skipping invalid URL (no hostname): {url}") return False + + # First, check if the hostname is an IP address + # If it is, ensure it's not a private IP (reject all direct IP access) + try: + ip = ipaddress.ip_address(host) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + log.warning(f"Skipping URL with private/internal IP address: {url}") + return False + except ValueError: + # Not an IP address, it's a domain name - continue with domain validation + pass + + # Check if host is in allowlist (including subdomains) + host_lower = host.lower() + is_allowed = False + for allowed_domain in ALLOWED_DOMAINS: + if host_lower == allowed_domain or host_lower.endswith(f".{allowed_domain}"): + is_allowed = True + break + + if not is_allowed: + log.warning(f"Skipping URL from untrusted domain: {url} (host: {host}) [rejected for security reasons]") + return False + + return True def validate_profile_id(profile_id: str) -> bool: From 6e7b61f55daf947f81b30233b77c9860f869d474 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Fri, 12 Dec 2025 18:33:36 -0600 Subject: [PATCH 7/7] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- main.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 03e5c21..94b1713 100644 --- a/main.py +++ b/main.py @@ -102,8 +102,22 @@ def _api_client() -> httpx.Client: def validate_folder_url(url: str) -> bool: """ - Validate that the folder URL is safe (HTTPS only, trusted domains). - Blocks SSRF attacks by allowing only trusted domains and blocking private IPs. + Validates that the folder URL is safe for use by enforcing strict security boundaries. + + Validation behavior: + 1. Only HTTPS URLs are allowed. + 2. Only URLs with hostnames matching the following domains (or their subdomains) are allowed: + - github.com + - githubusercontent.com + 3. URLs with direct IP addresses (both IPv4 and IPv6) are blocked. + 4. The following types of IP addresses are explicitly rejected: + - Private (e.g., 10.0.0.0/8, 192.168.0.0/16) + - Loopback (e.g., 127.0.0.1, ::1) + - Link-local (e.g., 169.254.0.0/16, fe80::/10) + - Reserved addresses + 5. Subdomain matching is supported for allowed domains (e.g., raw.githubusercontent.com is allowed). + + This helps prevent SSRF attacks by restricting access to trusted, public resources only. """ # Check HTTPS protocol if not url.startswith("https://"):