Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## 2024-03-23 - Prevent Exception Details Leakage in API | Vulnerability: Information Leakage | Learning: Direct exposure of `str(e)` in `HTTPException` can leak sensitive internal paths and stack traces to clients. | Prevention: Always log detailed exception messages server-side and return generic error messages (e.g., 'Internal server error') to the client in generic exception handlers.
30 changes: 20 additions & 10 deletions api/inference_server.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
import os
import argparse
from typing import List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os
from typing import List

import uvicorn
from fastapi import FastAPI, HTTPException
from llama_cpp import Llama
from pydantic import BaseModel

app = FastAPI(title="Pixelated Empathy EI Engine - Local Inference")

# Global model instance
model = None


class ChatMessage(BaseModel):
role: str
content: str


class ChatCompletionRequest(BaseModel):
messages: List[ChatMessage]
temperature: float = 0.7
max_tokens: int = 512
stream: bool = False


@app.on_event("startup")
def load_model():
global model
model_path = os.environ.get("MODEL_PATH", "pixelated-v1-wayfarer.Q4_K_M.gguf")

if not os.path.exists(model_path):
import sys

print(f"❌ CRITICAL ERROR: Model file not found at {model_path}")
print("Please download the GGUF model from Modal before starting the server.")
sys.exit(1)
Expand All @@ -38,14 +43,16 @@ def load_model():
model_path=model_path,
n_ctx=4096,
n_threads=int(os.cpu_count() or 4),
n_gpu_layers=0
n_gpu_layers=0,
)
print("✅ Model loaded successfully.")
except Exception as e:
import sys

print(f"❌ CRITICAL ERROR: Failed to load model: {e}")
sys.exit(1)


@app.post("/v1/chat/completions")
def chat_completion(request: ChatCompletionRequest):
# Defining as 'def' instead of 'async def' tells FastAPI
Expand All @@ -71,7 +78,7 @@ def chat_completion(request: ChatCompletionRequest):
formatted_prompt,
max_tokens=request.max_tokens,
temperature=request.temperature,
stop=["[INST]", "</s>", "<|endoftext|>"]
stop=["[INST]", "</s>", "<|endoftext|>"],
)

# Structure as OpenAI-compatible response
Expand All @@ -85,20 +92,23 @@ def chat_completion(request: ChatCompletionRequest):
"index": 0,
"message": {
"role": "assistant",
"content": response["choices"][0]["text"].strip()
"content": response["choices"][0]["text"].strip(),
},
"finish_reason": "stop"
"finish_reason": "stop",
}
],
"usage": response["usage"]
"usage": response["usage"],
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) from e
print(f"❌ Internal Error during chat completion: {e}")
raise HTTPException(status_code=500, detail="Internal server error")


@app.get("/health")
async def health():
return {"status": "ok", "model_loaded": model is not None}


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", default="0.0.0.0")
Expand Down
2 changes: 1 addition & 1 deletion api/test_pixel_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Add parent directories to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from ai.api.pixel_inference_service import (
from api.pixel_inference_service import (
ConversationMessage,
PixelInferenceEngine,
PixelInferenceRequest,
Expand Down
7 changes: 4 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.