Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 49 additions & 18 deletions src/extension_shield/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@
# Initialize logger
logger = logging.getLogger(__name__)


def _parse_trusted_proxy_hosts() -> list[str]:
"""Return the explicit proxy hosts allowed to send forwarded headers."""
raw_hosts = os.getenv("TRUSTED_PROXY_HOSTS", "").strip()
if raw_hosts:
return [host.strip() for host in raw_hosts.split(",") if host.strip()]
return ["127.0.0.1", "localhost", "::1"]

# Import safe JSON utilities from shared module
from extension_shield.utils.json_encoder import (
safe_json_dumps,
Expand Down Expand Up @@ -361,8 +369,9 @@ async def add_security_headers(request: Request, call_next):
print(f"✅ CSP: Production mode detected (STATIC_DIR={STATIC_DIR}, index.html exists)")
app.add_middleware(CSPMiddleware, is_dev=_is_dev)

# Trust X-Forwarded-Proto / X-Forwarded-For from Railway/Cloudflare so request.url.scheme is correct
app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*")
# Trust forwarded headers only from explicitly allowed proxy hosts.
# Set TRUSTED_PROXY_HOSTS to your actual reverse proxy / CDN hop(s).
app.add_middleware(ProxyHeadersMiddleware, trusted_hosts=_parse_trusted_proxy_hosts())
Comment on lines +372 to +374
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR introduces a security-relevant change to proxy header handling that is not mentioned in the PR description. With the default TRUSTED_PROXY_HOSTS (localhost-only), forwarded headers will be ignored in typical prod deployments behind a reverse proxy/CDN, which can break client scheme/IP detection and collapse rate limiting onto the proxy IP. Please either (a) document this in the PR description and ensure deployment config sets TRUSTED_PROXY_HOSTS appropriately, or (b) gate this behavior (e.g., require TRUSTED_PROXY_HOSTS in prod / keep prior behavior until configured).

Copilot uses AI. Check for mistakes.

# In-memory state lives in shared.py; import references here so existing
# code in this file (and tests) can continue using module-level names.
Expand Down Expand Up @@ -408,20 +417,9 @@ def _get_client_ip(request: Request) -> str:
"""
Get the client's IP address for rate limiting anonymous users.

Handles proxied requests via X-Forwarded-For and X-Real-IP headers.
Falls back to client host if no headers present.
Relies on ProxyHeadersMiddleware to rewrite request.client only when the
request came from a trusted proxy host.
"""
# Check X-Forwarded-For header (from reverse proxy/load balancer)
x_forwarded_for = request.headers.get("x-forwarded-for")
if x_forwarded_for:
# Take the first IP (original client)
return x_forwarded_for.split(",")[0].strip()

# Check X-Real-IP header (from nginx)
x_real_ip = request.headers.get("x-real-ip")
if x_real_ip:
return x_real_ip.strip()

# Fall back to direct client IP
if request.client:
return request.client.host
Expand Down Expand Up @@ -504,6 +502,27 @@ def _require_admin_or_telemetry_key(request: Request) -> None:
)


def _require_private_scan_artifact_access(
request: Request,
extension_id: str,
payload: Optional[Dict[str, Any]] = None,
) -> None:
"""Block access to private scan artifacts unless the requester owns the scan."""
requester_id = getattr(getattr(request, "state", None), "user_id", None)
if isinstance(payload, dict):
is_private = payload.get("visibility") == "private" or payload.get("source") == "upload"
owner_id = payload.get("user_id") or scan_user_ids.get(extension_id)
else:
is_private = scan_source.get(extension_id) == "upload"
owner_id = scan_user_ids.get(extension_id)

if not is_private:
return

if not requester_id or not owner_id or requester_id != owner_id:
raise HTTPException(status_code=404, detail="Scan results not found")


def _deep_scan_limit_status(rate_limit_key: str) -> Dict[str, Any]:
"""Get deep scan limit status. Returns unlimited in local/dev environments.
Anonymous (IP-based) users get 1 scan per day; authenticated users get 3.
Expand Down Expand Up @@ -2888,7 +2907,7 @@ async def batch_scan_status(req: BatchStatusRequest, request: Request):


@app.get("/api/scan/enforcement_bundle/{extension_id}")
async def get_enforcement_bundle(extension_id: str):
async def get_enforcement_bundle(extension_id: str, http_request: Request):
"""
Get the governance enforcement bundle for an analyzed extension.

Expand Down Expand Up @@ -2926,6 +2945,8 @@ async def get_enforcement_bundle(extension_id: str):

if not results:
raise HTTPException(status_code=404, detail="Scan results not found")

_require_private_scan_artifact_access(http_request, extension_id, results)

# Check if governance analysis was run
governance_bundle = results.get("governance_bundle")
Expand Down Expand Up @@ -2959,7 +2980,7 @@ async def get_enforcement_bundle(extension_id: str):


@app.get("/api/scan/report/{extension_id}")
async def generate_pdf_report(extension_id: str) -> Response:
async def generate_pdf_report(extension_id: str, http_request: Request) -> Response:
"""
Generate a PDF security report for an analyzed extension.

Expand Down Expand Up @@ -2989,6 +3010,8 @@ async def generate_pdf_report(extension_id: str) -> Response:
if not results:
raise HTTPException(status_code=404, detail="Scan results not found")

_require_private_scan_artifact_access(http_request, extension_id, results)

# Generate PDF report
try:
report_generator = ReportGenerator()
Expand Down Expand Up @@ -3038,6 +3061,8 @@ async def get_file_list(extension_id: str, http_request: Request) -> FileListRes
if not results:
raise HTTPException(status_code=404, detail="Extension not found")

_require_private_scan_artifact_access(http_request, extension_id, results)

Comment on lines 3063 to +3065
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new private-artifact guard returns 404 on owner mismatch, but this handler still has an earlier authorization branch that raises 403 when scan_user_ids[extension_id] is set to a different user. That means authenticated non-owners can still get a 403 (existence leak) instead of the intended 404 for private artifacts. Consider removing that earlier check, or restricting it strictly to in-progress scans and aligning the response behavior with _require_private_scan_artifact_access.

Copilot uses AI. Check for mistakes.
extracted_path = results.get("extracted_path")
if not extracted_path or not os.path.exists(extracted_path):
raise HTTPException(status_code=404, detail="Extracted files not found")
Expand Down Expand Up @@ -3069,6 +3094,8 @@ async def get_file_content(extension_id: str, file_path: str, http_request: Requ
if not results:
raise HTTPException(status_code=404, detail="Extension not found")

_require_private_scan_artifact_access(http_request, extension_id, results)

Comment on lines 3095 to +3098
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as /api/scan/files: this endpoint now enforces 404 via _require_private_scan_artifact_access, but an earlier branch can still return 403 for authenticated non-owners based on scan_user_ids, which leaks existence for private uploads. Please align the earlier check with the new 404-on-mismatch behavior (or limit it to true in-progress cases).

Copilot uses AI. Check for mistakes.
extracted_path = results.get("extracted_path")
if not extracted_path:
raise HTTPException(status_code=404, detail="Extracted files not found")
Expand Down Expand Up @@ -3824,7 +3851,7 @@ async def database_health_check(request: Request):


@app.get("/api/scan/icon/{extension_id}")
async def get_extension_icon(extension_id: str):
async def get_extension_icon(extension_id: str, http_request: Request):
"""
Get extension icon from the extracted extension folder.
Uses icon_path from storage when available, and falls back to persisted icon bytes.
Expand All @@ -3850,6 +3877,9 @@ async def get_extension_icon(extension_id: str):
icon_media_type = results.get("icon_media_type")
else:
db_icon_record = _load_icon_record_from_db(extension_id)
results = db.get_scan_result(extension_id)
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds a second DB lookup (db.get_scan_result) immediately after _load_icon_record_from_db, which already queries scan_results for Supabase and SQLite paths. Consider extending _load_icon_record_from_db to also return the minimal auth fields needed (visibility/user_id/source) so this endpoint can do a single DB fetch before calling _require_private_scan_artifact_access.

Suggested change
results = db.get_scan_result(extension_id)
results = db_icon_record

Copilot uses AI. Check for mistakes.
if results:
scan_results[extension_id] = results
extracted_path = db_icon_record.get("extracted_path")
icon_path = db_icon_record.get("icon_path")
icon_base64 = db_icon_record.get("icon_base64")
Expand Down Expand Up @@ -3893,6 +3923,7 @@ async def get_extension_icon(extension_id: str):

# Best practice: if we have a persisted icon blob, serve it immediately.
# This avoids relying on filesystem state (ephemeral/persistent) and prevents slow fallbacks.
_require_private_scan_artifact_access(http_request, extension_id, results)
persisted = _extension_icon_response_from_base64(icon_base64, icon_media_type)
if persisted:
return persisted
Expand Down
32 changes: 32 additions & 0 deletions tests/api/test_enforcement_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,38 @@ def test_get_enforcement_bundle_not_found(self, client):

assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()

def test_private_scan_artifact_requires_owner(self, client, tmp_path):
"""Private scan artifacts should not be readable without the owning user."""
ext_id = "privateartifact1234567890123456"
extracted = tmp_path / ext_id
extracted.mkdir()
Comment on lines +122 to +126
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test only asserts that unauthenticated access returns 404, but it doesn't assert that the owner can successfully retrieve the artifacts. Adding a positive-path assertion (e.g., setting an auth context/user_id for the owner and expecting 200/valid responses) would better protect against accidentally blocking legitimate access.

Copilot uses AI. Check for mistakes.

scan_results[ext_id] = {
"extension_id": ext_id,
"extension_name": "Private Extension",
"status": "completed",
"visibility": "private",
"user_id": "owner-user-1",
"governance_bundle": {"decision": {"verdict": "ALLOW"}},
"extracted_path": str(extracted),
}

try:
endpoints = [
f"/api/scan/enforcement_bundle/{ext_id}",
f"/api/scan/report/{ext_id}",
f"/api/scan/files/{ext_id}",
f"/api/scan/file/{ext_id}/manifest.json",
f"/api/scan/icon/{ext_id}",
]
Comment on lines +139 to +145
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the file-content endpoint, this test requests manifest.json but never creates that file under extracted_path. That means the endpoint could return 404 due to a missing file even if the ownership guard were absent, weakening the test’s ability to prove authorization is enforced. Consider creating a small manifest.json in the temp extracted dir (or choosing an endpoint that would otherwise return 200) so the test fails if the guard regresses.

Copilot uses AI. Check for mistakes.

for endpoint in endpoints:
response = client.get(endpoint)
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()
finally:
scan_results.pop(ext_id, None)

def test_get_enforcement_bundle_no_governance_data(self, client):
"""Test 404 when governance bundle not available."""
Expand Down
Loading