Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion api/dataset_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import os
import re
import sqlite3
from typing import Any, Dict, List, Optional

from fastapi import Depends, FastAPI, HTTPException, Query, Request, Security, status
from pydantic import BaseModel

from security.api_authentication import (
AuthenticationSystem,
PermissionLevel,
Expand All @@ -27,6 +29,8 @@
expires_in_days=365,
)

logger = logging.getLogger(__name__)

app = FastAPI(
title="Dataset Access API", description="API for accessing and querying datasets."
)
Expand All @@ -41,7 +45,9 @@ def validate_identifier(identifier: str) -> str:
This prevents SQL injection by disallowing special characters.
"""
if not re.match(r"^[a-zA-Z0-9_]+$", identifier):
raise HTTPException(status_code=400, detail=f"Invalid identifier format: {identifier}")
raise HTTPException(
status_code=400, detail=f"Invalid identifier format: {identifier}"
Comment on lines +48 to +49
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚨 issue (security): Avoid echoing the raw identifier back in the error detail to reduce potential information leakage and log noise.

The 400 response currently includes the full user-supplied identifier, which can be arbitrarily long, malformed, or contain control characters, and may end up in logs or UIs. Prefer a fixed error message like "Invalid identifier format" and, if necessary, log the raw identifier only on the server side.

Fix in Cursor

)
return identifier


Expand Down Expand Up @@ -178,6 +184,7 @@ async def list_datasets(
)
)
except sqlite3.Error:
logger.exception("Database error occurred while listing datasets")
raise HTTPException(status_code=500, detail="Database error occurred")
finally:
if conn:
Expand Down Expand Up @@ -235,6 +242,7 @@ async def get_dataset_metadata(
columns=columns,
)
except sqlite3.Error:
logger.exception("Database error occurred while getting dataset metadata")
raise HTTPException(status_code=500, detail="Database error occurred")
finally:
if conn:
Expand Down Expand Up @@ -315,6 +323,7 @@ async def query_dataset(
)

except sqlite3.Error:
logger.exception("Database error occurred while querying dataset")
raise HTTPException(status_code=500, detail="Database error occurred")
finally:
if conn:
Expand Down