diff --git a/.env.example b/.env.example index ed38205b..d24dee8e 100644 --- a/.env.example +++ b/.env.example @@ -104,8 +104,11 @@ VIRUSTOTAL_API_KEY= # Application Configuration # ------------------------------------------------------------------------------ -# Storage paths (usually set by Docker/Railway automatically) +# Storage paths and upload limits (usually set by Docker/Railway automatically) EXTENSION_STORAGE_PATH=/app/extensions_storage +UPLOAD_MAX_BYTES=104857600 +ZIP_EXTRACT_MAX_FILES=10000 +ZIP_EXTRACT_MAX_UNCOMPRESSED_BYTES=524288000 # Only used when DB_BACKEND=sqlite (local file). Ignored when using Supabase. DATABASE_PATH=/app/data/extension-shield.db diff --git a/deploy/env.production.template b/deploy/env.production.template index 8eecb2f1..1a8b09bc 100644 --- a/deploy/env.production.template +++ b/deploy/env.production.template @@ -84,8 +84,11 @@ VIRUSTOTAL_API_KEY= # Application Configuration # ------------------------------------------------------------------------------ -# Storage paths (usually set by Docker/Railway automatically) +# Storage paths and upload limits (usually set by Docker/Railway automatically) EXTENSION_STORAGE_PATH=/app/extensions_storage +UPLOAD_MAX_BYTES=104857600 +ZIP_EXTRACT_MAX_FILES=10000 +ZIP_EXTRACT_MAX_UNCOMPRESSED_BYTES=524288000 DATABASE_PATH=/app/data/extension-shield.db # ------------------------------------------------------------------------------ diff --git a/deploy/railway.toml b/deploy/railway.toml index 9f25f150..0dd52f7e 100644 --- a/deploy/railway.toml +++ b/deploy/railway.toml @@ -46,6 +46,7 @@ PYTHONDONTWRITEBYTECODE = "1" PYTHONUNBUFFERED = "1" LLM_PROVIDER = "openai" EXTENSION_STORAGE_PATH = "/app/extensions_storage" +UPLOAD_MAX_BYTES = "104857600" [static] # Not used since we serve static files via FastAPI diff --git a/railway.toml b/railway.toml index 4e57ab17..9a770a1c 100644 --- a/railway.toml +++ b/railway.toml @@ -46,6 +46,7 @@ PYTHONDONTWRITEBYTECODE = "1" PYTHONUNBUFFERED = "1" LLM_PROVIDER = "openai" EXTENSION_STORAGE_PATH = "/app/extensions_storage" +UPLOAD_MAX_BYTES = "104857600" EXTSHIELD_MODE = "cloud" [static] diff --git a/src/extension_shield/api/main.py b/src/extension_shield/api/main.py index cbcd50c6..5cd3b13a 100644 --- a/src/extension_shield/api/main.py +++ b/src/extension_shield/api/main.py @@ -169,6 +169,36 @@ def _noop(func): return _noop +def _format_byte_limit(max_bytes: int) -> str: + """Return a human-readable upload limit for API error messages.""" + if max_bytes % (1024 * 1024) == 0: + mb = max_bytes // (1024 * 1024) + return f"{mb}MB" + return f"{max_bytes} bytes" + + +async def _read_upload_with_limit(file: UploadFile, max_bytes: int) -> bytes: + """Read an UploadFile without accepting payloads larger than max_bytes.""" + chunk_size = 1024 * 1024 + chunks = [] + total = 0 + + while True: + remaining = max_bytes - total + 1 + chunk = await file.read(min(chunk_size, remaining)) + if not chunk: + break + total += len(chunk) + if total > max_bytes: + raise HTTPException( + status_code=413, + detail=f"File too large. Maximum size is {_format_byte_limit(max_bytes)}.", + ) + chunks.append(chunk) + + return b"".join(chunks) + + @app.exception_handler(RateLimitExceeded) async def rate_limit_handler(request: Request, exc: RateLimitExceeded): return JSONResponse(status_code=429, content={"detail": "Rate limit exceeded"}) @@ -2450,14 +2480,9 @@ async def upload_and_scan( detail="Invalid file type. Only .crx and .zip files are supported" ) - # Validate file size (max 100MB) - max_size = 100 * 1024 * 1024 # 100MB - file_content = await file.read() - if len(file_content) > max_size: - raise HTTPException( - status_code=400, - detail=f"File too large. Maximum size is {max_size / (1024*1024):.0f}MB" - ) + # Validate file size before saving/analysis. Read with a bounded loop rather than + # slurping an arbitrary body into memory first. + file_content = await _read_upload_with_limit(file, settings.upload_max_bytes) # Validate MIME type (additional security check) import mimetypes diff --git a/src/extension_shield/core/config.py b/src/extension_shield/core/config.py index 4e94c7f3..c3c357ca 100644 --- a/src/extension_shield/core/config.py +++ b/src/extension_shield/core/config.py @@ -22,6 +22,8 @@ StorageBackend = Literal["local", "supabase"] DbBackend = Literal["sqlite", "postgres", "supabase"] +DEFAULT_UPLOAD_MAX_BYTES = 104857600 # 100 MiB + def _normalize_env(value: Optional[str]) -> Optional[str]: if value is None: @@ -122,7 +124,8 @@ class Settings: admin_api_key: Optional[str] telemetry_admin_key: Optional[str] - # Zip extract limits (zip-bomb protection) + # Upload and zip extract limits (DoS/zip-bomb protection) + upload_max_bytes: int zip_extract_max_files: int zip_extract_max_uncompressed_bytes: int @@ -180,6 +183,8 @@ def get_settings() -> Settings: - ADMIN_API_KEY, TELEMETRY_ADMIN_KEY (optional, for admin endpoints) - STORAGE_BACKEND: local|supabase (currently only local FS is used) - DB_BACKEND: sqlite|supabase (Postgres via Supabase; sqlite is dev fallback) + - UPLOAD_MAX_BYTES: max accepted CRX/ZIP upload size before request rejection + - ZIP_EXTRACT_MAX_FILES, ZIP_EXTRACT_MAX_UNCOMPRESSED_BYTES: zip-bomb protection """ env = _parse_env_name( @@ -207,7 +212,16 @@ def get_settings() -> Settings: admin_api_key = os.environ.get("ADMIN_API_KEY") telemetry_admin_key = os.environ.get("TELEMETRY_ADMIN_KEY") - # Zip-bomb protection: max file count and max total uncompressed size + # Upload and zip-bomb protection: reject large uploads before saving/analysis, + # then bound extracted archive complexity separately. + _upload_max_bytes = os.environ.get("UPLOAD_MAX_BYTES", str(DEFAULT_UPLOAD_MAX_BYTES)) + try: + upload_max_bytes = int(_upload_max_bytes) + except ValueError: + upload_max_bytes = DEFAULT_UPLOAD_MAX_BYTES + if upload_max_bytes <= 0: + upload_max_bytes = DEFAULT_UPLOAD_MAX_BYTES + _zip_max_files = os.environ.get("ZIP_EXTRACT_MAX_FILES", "10000") _zip_max_bytes = os.environ.get("ZIP_EXTRACT_MAX_UNCOMPRESSED_BYTES", "524288000") # 500 MiB try: @@ -256,6 +270,7 @@ def get_settings() -> Settings: supabase_jwt_aud=supabase_jwt_aud, admin_api_key=admin_api_key, telemetry_admin_key=telemetry_admin_key, + upload_max_bytes=upload_max_bytes, zip_extract_max_files=zip_extract_max_files, zip_extract_max_uncompressed_bytes=zip_extract_max_uncompressed_bytes, ) diff --git a/tests/api/test_upload_limits.py b/tests/api/test_upload_limits.py new file mode 100644 index 00000000..b90efa23 --- /dev/null +++ b/tests/api/test_upload_limits.py @@ -0,0 +1,60 @@ +import asyncio +from unittest.mock import MagicMock, patch + +from fastapi.testclient import TestClient + +from extension_shield.api.main import _read_upload_with_limit, app +from extension_shield.core.config import get_settings + + +class _AsyncChunkFile: + def __init__(self, payload: bytes): + self._payload = payload + self._offset = 0 + + async def read(self, size: int = -1) -> bytes: + if self._offset >= len(self._payload): + return b"" + if size is None or size < 0: + size = len(self._payload) - self._offset + chunk = self._payload[self._offset:self._offset + size] + self._offset += len(chunk) + return chunk + + +def _settings(upload_max_bytes: int) -> MagicMock: + settings = MagicMock() + settings.is_prod.return_value = False + settings.upload_max_bytes = upload_max_bytes + return settings + + +def test_upload_rejects_payload_above_configured_limit(tmp_path): + """Uploaded CRX/ZIP payloads larger than the configured limit are rejected before saving.""" + client = TestClient(app) + + with patch("extension_shield.api.main.get_settings", return_value=_settings(4)), \ + patch("extension_shield.api.main.RESULTS_DIR", tmp_path), \ + patch("extension_shield.api.main.run_analysis_workflow") as run_analysis: + response = client.post( + "/api/scan/upload", + files={"file": ("extension.zip", b"PK\x03\x04X", "application/zip")}, + ) + + assert response.status_code == 413 + assert response.json()["detail"] == "File too large. Maximum size is 4 bytes." + assert list(tmp_path.iterdir()) == [] + run_analysis.assert_not_called() + + +def test_bounded_reader_accepts_payload_at_configured_limit(): + """Payloads exactly at the configured byte limit are accepted by the bounded reader.""" + result = asyncio.run(_read_upload_with_limit(_AsyncChunkFile(b"1234"), 4)) + + assert result == b"1234" + + +def test_invalid_upload_limit_env_falls_back_to_default(monkeypatch): + monkeypatch.setenv("UPLOAD_MAX_BYTES", "not-an-int") + + assert get_settings().upload_max_bytes == 104_857_600