diff --git a/.Jules/sentinel.md b/.Jules/sentinel.md new file mode 100644 index 00000000..64a47e2f --- /dev/null +++ b/.Jules/sentinel.md @@ -0,0 +1 @@ +## 2024-03-23 - Prevent Exception Details Leakage in API | Vulnerability: Information Leakage | Learning: Direct exposure of `str(e)` in `HTTPException` can leak sensitive internal paths and stack traces to clients. | Prevention: Always log detailed exception messages server-side and return generic error messages (e.g., 'Internal server error') to the client in generic exception handlers. diff --git a/api/inference_server.py b/api/inference_server.py index 08bd8ece..c74f8884 100644 --- a/api/inference_server.py +++ b/api/inference_server.py @@ -1,26 +1,30 @@ -import os import argparse -from typing import List, Optional -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel +import os +from typing import List + import uvicorn +from fastapi import FastAPI, HTTPException from llama_cpp import Llama +from pydantic import BaseModel app = FastAPI(title="Pixelated Empathy EI Engine - Local Inference") # Global model instance model = None + class ChatMessage(BaseModel): role: str content: str + class ChatCompletionRequest(BaseModel): messages: List[ChatMessage] temperature: float = 0.7 max_tokens: int = 512 stream: bool = False + @app.on_event("startup") def load_model(): global model @@ -28,6 +32,7 @@ def load_model(): if not os.path.exists(model_path): import sys + print(f"❌ CRITICAL ERROR: Model file not found at {model_path}") print("Please download the GGUF model from Modal before starting the server.") sys.exit(1) @@ -38,14 +43,16 @@ def load_model(): model_path=model_path, n_ctx=4096, n_threads=int(os.cpu_count() or 4), - n_gpu_layers=0 + n_gpu_layers=0, ) print("✅ Model loaded successfully.") except Exception as e: import sys + print(f"❌ CRITICAL ERROR: Failed to load model: {e}") sys.exit(1) + @app.post("/v1/chat/completions") def chat_completion(request: ChatCompletionRequest): # Defining as 'def' instead of 'async def' tells FastAPI @@ -71,7 +78,7 @@ def chat_completion(request: ChatCompletionRequest): formatted_prompt, max_tokens=request.max_tokens, temperature=request.temperature, - stop=["[INST]", "", "<|endoftext|>"] + stop=["[INST]", "", "<|endoftext|>"], ) # Structure as OpenAI-compatible response @@ -85,20 +92,23 @@ def chat_completion(request: ChatCompletionRequest): "index": 0, "message": { "role": "assistant", - "content": response["choices"][0]["text"].strip() + "content": response["choices"][0]["text"].strip(), }, - "finish_reason": "stop" + "finish_reason": "stop", } ], - "usage": response["usage"] + "usage": response["usage"], } except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) from e + print(f"❌ Internal Error during chat completion: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + @app.get("/health") async def health(): return {"status": "ok", "model_loaded": model is not None} + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", default="0.0.0.0") diff --git a/api/test_pixel_inference.py b/api/test_pixel_inference.py index 870fd72c..b2983500 100644 --- a/api/test_pixel_inference.py +++ b/api/test_pixel_inference.py @@ -21,7 +21,7 @@ # Add parent directories to path sys.path.insert(0, str(Path(__file__).parent.parent)) -from ai.api.pixel_inference_service import ( +from api.pixel_inference_service import ( ConversationMessage, PixelInferenceEngine, PixelInferenceRequest, diff --git a/uv.lock b/uv.lock index a67e3221..fe1e3205 100644 --- a/uv.lock +++ b/uv.lock @@ -371,9 +371,8 @@ name = "bitsandbytes" version = "0.49.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "packaging", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "packaging", marker = "sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ @@ -2810,6 +2809,7 @@ dependencies = [ { name = "sentence-transformers" }, { name = "sentencepiece" }, { name = "setuptools" }, + { name = "starlette" }, { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, { name = "torchaudio", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, @@ -2887,6 +2887,7 @@ requires-dist = [ { name = "sentence-transformers", specifier = ">=3.0.0" }, { name = "sentencepiece", specifier = ">=0.1.99" }, { name = "setuptools", specifier = ">=80.9.0" }, + { name = "starlette", specifier = ">=0.40.0" }, { name = "torch", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cpu" }, { name = "torchaudio", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cpu" }, { name = "torchvision", specifier = ">=0.23.0", index = "https://download.pytorch.org/whl/cpu" },