Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 139 additions & 15 deletions app_python/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,75 @@
import socket
import platform
import logging
import sys
from datetime import datetime, timezone
import time

from fastapi.responses import JSONResponse, Response
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from pythonjsonlogger import jsonlogger
import uvicorn


HOST = os.getenv("HOST", "0.0.0.0")
PORT = int(os.getenv("PORT", 5000))

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",

# Configure JSON logging
logger = logging.getLogger("devops-info-service")
logger.setLevel(logging.INFO)
logger.handlers.clear()

log_handler = logging.StreamHandler(sys.stdout)
formatter = jsonlogger.JsonFormatter(
"%(asctime)s %(levelname)s %(message)s %(method)s %(path)s %(client_ip)s %(status_code)s"
)
log_handler.setFormatter(formatter)
logger.addHandler(log_handler)
logger.propagate = False

# Prometheus metrics
HTTP_REQUESTS_TOTAL = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status"],
)
logger = logging.getLogger(__name__)

HTTP_REQUEST_DURATION_SECONDS = Histogram(
"http_request_duration_seconds",
"HTTP request duration in seconds",
["method", "endpoint"],
)

HTTP_REQUESTS_IN_PROGRESS = Gauge(
"http_requests_in_progress",
"HTTP requests currently being processed",
)

DEVOPS_INFO_ENDPOINT_CALLS_TOTAL = Counter(
"devops_info_endpoint_calls_total",
"Total endpoint calls in DevOps Info Service",
["endpoint"],
)

DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS = Histogram(
"devops_info_system_collection_seconds",
"System information collection duration in seconds",
)

START_TIME = datetime.now(timezone.utc)
app = FastAPI(title="DevOps Info Service")

logger.info("Application initialized")
logger.info(
"Application initialized",
extra={
"method": "",
"path": "",
"client_ip": "",
"status_code": "",
},
)


def get_uptime():
Expand All @@ -38,10 +86,16 @@ def get_uptime():
minutes = (seconds % 3600) // 60
return seconds, f"{hours} hours, {minutes} minutes"

def normalize_endpoint(path: str) -> str:
if path in ["/", "/health", "/metrics"]:
return path
return "other"

def get_system_info():
"""Collect system information."""
return {
start = time.time()

info = {
"hostname": socket.gethostname(),
"platform": platform.system(),
"platform_version": platform.release(),
Expand All @@ -50,12 +104,54 @@ def get_system_info():
"python_version": platform.python_version(),
}

DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS.observe(time.time() - start)
return info


@app.middleware("http")
async def log_requests(request: Request, call_next):
"""Log every HTTP request in JSON format and collect Prometheus metrics."""
endpoint = normalize_endpoint(request.url.path)
method = request.method

HTTP_REQUESTS_IN_PROGRESS.inc()
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint=endpoint).inc()

start_time = time.time()
response = await call_next(request)
duration = time.time() - start_time

status = str(response.status_code)

HTTP_REQUESTS_TOTAL.labels(
method=method,
endpoint=endpoint,
status=status,
).inc()

HTTP_REQUEST_DURATION_SECONDS.labels(
method=method,
endpoint=endpoint,
).observe(duration)

HTTP_REQUESTS_IN_PROGRESS.dec()

logger.info(
"HTTP request processed",
extra={
"method": method,
"path": request.url.path,
"client_ip": request.client.host if request.client else "",
"status_code": response.status_code,
},
)

return response


@app.get("/")
async def index(request: Request):
"""Main endpoint returning service and system information."""
logger.info("Handling request to '/'")

uptime_seconds, uptime_human = get_uptime()

return {
Expand All @@ -81,26 +177,38 @@ async def index(request: Request):
"endpoints": [
{"path": "/", "method": "GET", "description": "Service information"},
{"path": "/health", "method": "GET", "description": "Health check"},
{"path": "/metrics", "method": "GET", "description": "Prometheus metrics"},
],
}


@app.get("/health")
async def health():
"""Health check endpoint for monitoring."""
logger.info("Health check requested")

uptime_seconds, _ = get_uptime()
return {
"status": "healthy",
"timestamp": datetime.now(timezone.utc).isoformat(),
"uptime_seconds": uptime_seconds,
}

@app.get("/metrics")
async def metrics():
"""Prometheus metrics endpoint."""
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)

@app.exception_handler(404)
async def not_found(request: Request, exc):
"""Handle 404 errors."""
logger.warning(
"Endpoint not found",
extra={
"method": request.method,
"path": request.url.path,
"client_ip": request.client.host if request.client else "",
"status_code": 404,
},
)
return JSONResponse(
status_code=404,
content={"error": "Not Found", "message": "Endpoint does not exist"},
Expand All @@ -110,13 +218,29 @@ async def not_found(request: Request, exc):
@app.exception_handler(500)
async def internal_error(request: Request, exc):
"""Handle unexpected server errors."""
logger.error(f"Internal server error: {exc}")
logger.error(
"Internal server error",
extra={
"method": request.method,
"path": request.url.path,
"client_ip": request.client.host if request.client else "",
"status_code": 500,
},
)
return JSONResponse(
status_code=500,
content={"error": "Internal Server Error", "message": "An unexpected error occurred"},
)

if __name__ == "__main__":
logger.info(f"Starting server on {HOST}:{PORT}")
uvicorn.run("app:app", host=HOST, port=PORT)

if __name__ == "__main__":
logger.info(
"Starting server",
extra={
"method": "",
"path": "",
"client_ip": "",
"status_code": "",
},
)
uvicorn.run(app, host=HOST, port=PORT)
2 changes: 2 additions & 0 deletions app_python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
fastapi==0.115.8
uvicorn[standard]==0.32.0
python-json-logger
prometheus-client==0.23.1
130 changes: 130 additions & 0 deletions monitoring/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
version: "3.8"

services:
loki:
image: grafana/loki:3.0.0
container_name: loki
ports:
- "3100:3100"
command: -config.file=/etc/loki/config.yml
volumes:
- ./loki/config.yml:/etc/loki/config.yml:ro
- loki-data:/loki
networks:
- logging
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
deploy:
resources:
limits:
memory: 1G
cpus: "1.0"

promtail:
image: grafana/promtail:3.0.0
container_name: promtail
command: -config.file=/etc/promtail/config.yml
ports:
- "9080:9080"
volumes:
- ./promtail/config.yml:/etc/promtail/config.yml:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- logging
restart: unless-stopped
deploy:
resources:
limits:
memory: 256M
cpus: "0.5"

grafana:
image: grafana/grafana:12.3.1
container_name: grafana
ports:
- "3000:3000"
environment:
- GF_AUTH_ANONYMOUS_ENABLED=false
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- grafana-data:/var/lib/grafana
networks:
- logging
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
deploy:
resources:
limits:
memory: 512M
cpus: "0.5"

prometheus:
image: prom/prometheus:v3.9.0
container_name: prometheus
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=15d"
- "--storage.tsdb.retention.size=10GB"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks:
- logging
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
deploy:
resources:
limits:
memory: 1G
cpus: "1.0"

app-python:
image: fayzullin/devops-info-service:latest
container_name: app-python
ports:
- "8000:5000"
networks:
- logging
labels:
logging: "promtail"
app: "devops-python"
service_name: "devops-python"
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:5000/health || exit 1"]
interval: 10s
timeout: 5s
retries: 5
deploy:
resources:
limits:
memory: 256M
cpus: "0.5"

volumes:
loki-data:
grafana-data:
prometheus-data:

networks:
logging:
driver: bridge
Loading
Loading