Skip to content

Commit cdce99c

Browse files
lab07 submission
1 parent a3d1763 commit cdce99c

12 files changed

Lines changed: 367 additions & 7 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
test
22
.secrets
3+
monitoring/.env

app_python/app.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,94 @@
55
import socket
66
from datetime import datetime, timezone
77
import logging
8+
import json
9+
import time
10+
import uuid
11+
from contextlib import asynccontextmanager
812

913
HOST = os.getenv('HOST', '0.0.0.0')
1014
PORT = int(os.getenv('PORT', 5000))
1115
DEBUG = os.getenv('DEBUG', 'False').lower() == 'true'
1216

13-
logging.basicConfig(
14-
level=logging.INFO if not DEBUG else logging.DEBUG,
15-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16-
)
17-
logger = logging.getLogger(__name__)
1817

18+
class JsonFormatter(logging.Formatter):
19+
def format(self, record):
20+
log_record = {
21+
"timestamp": datetime.now(timezone.utc).isoformat(),
22+
"level": record.levelname,
23+
"message": record.getMessage(),
24+
"app": "devops-python",
25+
"logger": record.name
26+
}
27+
if hasattr(record, "extra_info"):
28+
log_record.update(record.extra_info)
29+
return json.dumps(log_record)
30+
31+
32+
logger = logging.getLogger("app")
33+
logger.setLevel(logging.INFO)
34+
handler = logging.StreamHandler()
35+
handler.setFormatter(JsonFormatter())
36+
logger.addHandler(handler)
37+
logger.propagate = False
1938

2039
app = FastAPI()
2140
start_time = datetime.now()
2241

2342

43+
@app.middleware("http")
44+
async def log_requests(request: Request, call_next):
45+
request_id = str(uuid.uuid4())
46+
start_time = time.time()
47+
48+
log_context = {
49+
"request_id": request_id,
50+
"client_ip": request.client.host if request.client else "unknown",
51+
"method": request.method,
52+
"path": request.url.path
53+
}
54+
55+
try:
56+
response = await call_next(request)
57+
process_time = int((time.time() - start_time) * 1000)
58+
59+
log_context.update({
60+
"status_code": response.status_code,
61+
"duration_ms": process_time
62+
})
63+
64+
logger.info(f"Request handled: {request.method} {request.url.path}",
65+
extra={"extra_info": log_context})
66+
67+
return response
68+
69+
except Exception as e:
70+
process_time = int((time.time() - start_time) * 1000)
71+
log_context.update({
72+
"status_code": 500,
73+
"duration_ms": process_time,
74+
"error": str(e)
75+
})
76+
logger.error(f"Request failed: {str(e)}",
77+
extra={"extra_info": log_context})
78+
raise e
79+
80+
81+
@asynccontextmanager
82+
async def lifespan(app: FastAPI):
83+
startup_config = {
84+
"version": "1.0.0",
85+
"mode": "production",
86+
"log_level": "INFO"
87+
}
88+
logger.info("Application starting up", extra={
89+
"extra_info": {"config": startup_config}})
90+
91+
yield
92+
93+
logger.info("Application shutting down")
94+
95+
2496
@app.get("/")
2597
def read_root(request: Request):
2698
logger.debug(f'Request: {request.method} {request.url}')
@@ -94,8 +166,6 @@ async def not_found(request, exc):
94166
}
95167
)
96168

97-
# Custom error handler for 500 Internal Server Error
98-
99169

100170
@app.exception_handler(500)
101171
async def internal_error(request, exc):

monitoring/docker-compose.yml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
services:
2+
app-python:
3+
build:
4+
context: ../app_python
5+
ports:
6+
- "8000:5000"
7+
networks:
8+
- logging
9+
labels:
10+
logging: "promtail"
11+
app: "devops-python"
12+
deploy:
13+
resources:
14+
limits:
15+
cpus: '0.5'
16+
memory: 256M
17+
healthcheck:
18+
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health').read()"]
19+
interval: 30s
20+
timeout: 10s
21+
retries: 3
22+
23+
loki:
24+
image: grafana/loki:3.0.0
25+
container_name: loki
26+
ports:
27+
- "3100:3100"
28+
volumes:
29+
- ./loki/config.yml:/etc/loki/config.yml
30+
- loki-data:/loki
31+
command: -config.file=/etc/loki/config.yml
32+
networks:
33+
- logging
34+
deploy:
35+
resources:
36+
limits:
37+
cpus: '1.0'
38+
memory: 512M
39+
healthcheck:
40+
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"]
41+
interval: 10s
42+
timeout: 5s
43+
retries: 5
44+
45+
promtail:
46+
image: grafana/promtail:3.0.0
47+
container_name: promtail
48+
ports:
49+
- 9080:9080
50+
volumes:
51+
- ./promtail/config.yml:/etc/promtail/config.yml
52+
- /var/run/docker.sock:/var/run/docker.sock:ro
53+
- /var/lib/docker/containers:/var/lib/docker/containers:ro
54+
command: -config.file=/etc/promtail/config.yml
55+
networks:
56+
- logging
57+
deploy:
58+
resources:
59+
limits:
60+
cpus: '0.5'
61+
memory: 256M
62+
63+
grafana:
64+
image: grafana/grafana:12.3.1
65+
container_name: grafana
66+
ports:
67+
- "3000:3000"
68+
environment:
69+
- GF_AUTH_ANONYMOUS_ENABLED=false
70+
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
71+
- GF_SECURITY_ALLOW_EMBEDDING=true
72+
volumes:
73+
- grafana-data:/var/lib/grafana
74+
networks:
75+
- logging
76+
deploy:
77+
resources:
78+
limits:
79+
cpus: '0.5'
80+
memory: 512M
81+
healthcheck:
82+
test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"]
83+
interval: 30s
84+
timeout: 10s
85+
retries: 3
86+
87+
volumes:
88+
loki-data:
89+
grafana-data:
90+
91+
networks:
92+
logging:

monitoring/docs/LAB07.md

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Lab 07
2+
3+
## 1. Architecture
4+
5+
The monitoring stack follows a "Push" architecture where logs are collected at the source and shipped to a central aggregator.
6+
7+
* **FastAPI App**: Generates structured JSON logs.
8+
* **Promtail**: Scrapes logs from the Docker socket, filters by labels, and pushes to Loki.
9+
* **Loki 3.0**: Stores log chunks and indexes metadata using the TSDB engine.
10+
* **Grafana**: Visualizes logs and converts log streams into metrics via LogQL.
11+
12+
## 2. Setup Guide
13+
14+
1. **Environment**: Create a `.env` file with `GRAFANA_ADMIN_PASSWORD`.
15+
2. **Deployment**:
16+
17+
```bash
18+
cd monitoring
19+
docker compose up -d
20+
21+
```
22+
23+
**Verification**: Access Grafana at `http://localhost:3000` and login with the credentials from your `.env`.
24+
25+
## 3. Configuration
26+
27+
### Loki 3.0 (TSDB & Retention)
28+
29+
I utilized the new `common` block and `tsdb` shipping to optimize storage for Loki 3.0.
30+
31+
```yaml
32+
schema_config:
33+
configs:
34+
- from: 2024-01-01
35+
store: tsdb
36+
object_store: filesystem
37+
schema: v13
38+
39+
```
40+
41+
* **Why**: TSDB is significantly faster than the older Boltdb-shipper and is the recommended engine for version 3.0.
42+
43+
### Promtail (Filtering)
44+
45+
Promtail is configured to only collect logs from containers with specific Docker labels.
46+
47+
```yaml
48+
- source_labels: ['__meta_docker_container_label_logging']
49+
regex: 'promtail'
50+
action: keep
51+
52+
```
53+
54+
* **Why**: This prevents "log spam" from system containers and ensures I only monitor what I explicitly label.
55+
56+
## 4. Application Logging
57+
58+
I implemented structured logging using a custom `JsonFormatter` and FastAPI **Lifespan** events.
59+
60+
**Implementation Snippet:**
61+
62+
```python
63+
class JsonFormatter(logging.Formatter):
64+
def format(self, record):
65+
log_record = {
66+
"timestamp": datetime.now(timezone.utc).isoformat(),
67+
"level": record.levelname,
68+
"message": record.getMessage(),
69+
"app": "devops-python",
70+
"logger": record.name
71+
}
72+
if hasattr(record, "extra_info"):
73+
log_record.update(record.extra_info)
74+
return json.dumps(log_record)
75+
76+
handler = logging.StreamHandler()
77+
handler.setFormatter(JsonFormatter())
78+
logger.addHandler(handler)
79+
80+
```
81+
82+
By outputting JSON directly to `stdout`, Promtail captures the entire object, allowing us to use the `| json` parser in LogQL.
83+
84+
## 5. Dashboard & LogQL
85+
86+
| Panel | Query | Explanation |
87+
| --- | --- | --- |
88+
| Logs Table | `{app=~"devops-.*"}` | Shows the raw log stream for all related apps. |
89+
| **Request Rate** | `sum by (app) (rate({app=~"devops-.*"} [1m]))` | Converts log lines into a "Requests Per Second" metric. |
90+
| **Error Logs** | `{app=~"devops-.*"} \| json \| level="ERROR"` | Filters JSON objects where the level key is specifically ERROR. |
91+
| **Status Codes** | `sum by (status_code) (count_over_time({app=~"devops-.*"} \| json [5m]))` | A pie chart showing the distribution of HTTP response codes. |
92+
93+
![alt text](<image copy 3.png>)
94+
95+
## 6. Production Configuration
96+
97+
* **Security**: Anonymous access is disabled (`GF_AUTH_ANONYMOUS_ENABLED=false`).
98+
* **Resources**:
99+
* Loki: 1.0 CPU, 512MB RAM.
100+
* Grafana/App: 0.5 CPU, 256MB RAM.
101+
102+
* **Health Checks**: Defined in `docker-compose.yml` using `/ready` (Loki) and `/api/health` (Grafana).
103+
104+
## 7. Testing
105+
106+
| Component | Command | Expected Result |
107+
| --- | --- | --- |
108+
| **Stack Status** | `docker compose ps` | All containers `(healthy)` |
109+
| **Loki API** | `curl http://localhost:3100/ready` | `ready` |
110+
| **Promtail UI** | `curl http://localhost:9080/targets` | List of active containers |
111+
112+
## 8. Evidence
113+
114+
### Task 1
115+
116+
![alt text](image.png)
117+
118+
### Task 2
119+
120+
![alt text](<image copy.png>)
121+
122+
![alt text](<image copy 2.png>)
123+
124+
### Task 3
125+
126+
![alt text](<image copy 3.png>)
127+
128+
### Task 4
129+
130+
![alt text](image-1.png)
131+
![alt text](image-2.png)

monitoring/docs/image copy 2.png

153 KB
Loading

monitoring/docs/image copy 3.png

162 KB
Loading

monitoring/docs/image copy.png

161 KB
Loading

monitoring/docs/image-1.png

230 KB
Loading

monitoring/docs/image-2.png

893 KB
Loading

monitoring/docs/image.png

160 KB
Loading

0 commit comments

Comments
 (0)