Skip to content

Commit bef2d1f

Browse files
committed
Lab08
1 parent 1479457 commit bef2d1f

31 files changed

Lines changed: 1525 additions & 112 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,4 @@ __pycache__/
4444
# Monitoring
4545
monitoring/.env
4646
!monitoring/grafana/dashboards/lab07-logs-dashboard.json
47+
!monitoring/grafana/dashboards/lab08-metrics-dashboard.json

Lab-1/app_python/app.py

Lines changed: 102 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,43 @@
99
from datetime import datetime, timezone
1010

1111
from dotenv import load_dotenv
12-
from flask import Flask, g, jsonify, request
12+
from flask import Flask, Response, g, jsonify, request
1313
from flask_swagger_ui import get_swaggerui_blueprint
14+
from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest
1415

1516
app = Flask(__name__)
1617

18+
TRACKED_ENDPOINTS = {'/', '/health', '/metrics', '/swagger.json'}
19+
20+
HTTP_REQUESTS_TOTAL = Counter(
21+
'http_requests_total',
22+
'Total HTTP requests',
23+
['method', 'endpoint', 'status_code']
24+
)
25+
26+
HTTP_REQUEST_DURATION_SECONDS = Histogram(
27+
'http_request_duration_seconds',
28+
'HTTP request duration in seconds',
29+
['method', 'endpoint']
30+
)
31+
32+
HTTP_REQUESTS_IN_PROGRESS = Gauge(
33+
'http_requests_in_progress',
34+
'HTTP requests currently being processed',
35+
['method', 'endpoint', 'status_code']
36+
)
37+
38+
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL = Counter(
39+
'devops_info_endpoint_calls_total',
40+
'Total endpoint calls for DevOps info service',
41+
['endpoint']
42+
)
43+
44+
DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS = Histogram(
45+
'devops_info_system_collection_seconds',
46+
'System info collection duration in seconds'
47+
)
48+
1749

1850
class JSONFormatter(logging.Formatter):
1951
def format(self, record: logging.LogRecord) -> str:
@@ -77,6 +109,17 @@ def get_client_ip() -> str:
77109
return client_ip
78110

79111

112+
def normalize_endpoint() -> str:
113+
url_rule = getattr(request, 'url_rule', None)
114+
endpoint = url_rule.rule if url_rule and url_rule.rule else request.path
115+
116+
if endpoint.startswith('/docs'):
117+
return '/docs'
118+
if endpoint in TRACKED_ENDPOINTS:
119+
return endpoint
120+
return '/other'
121+
122+
80123
# conf
81124
load_dotenv()
82125
HOST = os.getenv('HOST', '0.0.0.0')
@@ -168,7 +211,8 @@ def get_endpoints() -> list[dict]:
168211
"""return a list of available endpoints"""
169212
return [
170213
{'path': '/', 'method': 'GET', 'description': 'Service information'},
171-
{'path': '/health', 'method': 'GET', 'description': 'Health check'}
214+
{'path': '/health', 'method': 'GET', 'description': 'Health check'},
215+
{'path': '/metrics', 'method': 'GET', 'description': 'Prometheus metrics'},
172216
]
173217

174218

@@ -200,6 +244,16 @@ def get_endpoints() -> list[dict]:
200244
}
201245
}
202246
}
247+
},
248+
'/metrics': {
249+
'get': {
250+
'summary': 'Prometheus metrics',
251+
'responses': {
252+
'200': {
253+
'description': 'Prometheus text exposition format'
254+
}
255+
}
256+
}
203257
}
204258
}
205259
}
@@ -208,6 +262,14 @@ def get_endpoints() -> list[dict]:
208262
@app.before_request
209263
def log_request() -> None:
210264
g.request_started_at = time.perf_counter()
265+
g.normalized_endpoint = normalize_endpoint()
266+
267+
HTTP_REQUESTS_IN_PROGRESS.labels(
268+
method=request.method,
269+
endpoint=g.normalized_endpoint,
270+
status_code='in_progress',
271+
).inc()
272+
211273
logger.info(
212274
'Incoming request',
213275
extra={
@@ -223,9 +285,32 @@ def log_request() -> None:
223285
@app.after_request
224286
def log_response(response):
225287
started_at = getattr(g, 'request_started_at', None)
288+
duration_seconds = None
226289
duration_ms = None
227290
if started_at is not None:
228-
duration_ms = round((time.perf_counter() - started_at) * 1000, 2)
291+
duration_seconds = time.perf_counter() - started_at
292+
duration_ms = round(duration_seconds * 1000, 2)
293+
294+
endpoint = getattr(g, 'normalized_endpoint', normalize_endpoint())
295+
status_code = str(response.status_code)
296+
297+
HTTP_REQUESTS_TOTAL.labels(
298+
method=request.method,
299+
endpoint=endpoint,
300+
status_code=status_code,
301+
).inc()
302+
303+
if duration_seconds is not None:
304+
HTTP_REQUEST_DURATION_SECONDS.labels(
305+
method=request.method,
306+
endpoint=endpoint,
307+
).observe(duration_seconds)
308+
309+
HTTP_REQUESTS_IN_PROGRESS.labels(
310+
method=request.method,
311+
endpoint=endpoint,
312+
status_code='in_progress',
313+
).dec()
229314

230315
log_extra: dict[str, object] = {
231316
'event': 'request_end',
@@ -248,10 +333,15 @@ def log_response(response):
248333
@app.route('/')
249334
def index():
250335
"""main endpoint"""
336+
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint='/').inc()
337+
251338
uptime = get_uptime()
339+
with DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS.time():
340+
system_info = get_system_info()
341+
252342
payload = {
253343
'service': get_service_info(),
254-
'system': get_system_info(),
344+
'system': system_info,
255345
'runtime': {
256346
'uptime_seconds': uptime['seconds'],
257347
'uptime_human': uptime['human'],
@@ -267,6 +357,7 @@ def index():
267357
@app.route('/health')
268358
def health():
269359
"""health check endpoint"""
360+
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint='/health').inc()
270361
uptime = get_uptime()
271362
return jsonify({
272363
'status': 'healthy',
@@ -275,8 +366,15 @@ def health():
275366
})
276367

277368

369+
@app.route('/metrics')
370+
def metrics():
371+
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint='/metrics').inc()
372+
return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
373+
374+
278375
@app.route('/swagger.json')
279376
def swagger_json():
377+
DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint='/swagger.json').inc()
280378
return jsonify(OPENAPI_SPEC)
281379

282380

Lab-1/app_python/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ Flask==3.1.0
44
flask-swagger-ui==4.11.1
55
# Env support
66
python-dotenv==1.0.1
7+
# Metrics
8+
prometheus-client==0.23.1

Lab-1/app_python/tests/test_app.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,21 @@ def boom():
7272
"error": "Internal Server Error",
7373
"message": "An unexpected error occurred",
7474
}
75+
76+
77+
def test_metrics_endpoint_exposes_prometheus_metrics(client):
78+
# Generate some traffic to populate metric series.
79+
client.get("/")
80+
client.get("/health")
81+
82+
response = client.get("/metrics")
83+
84+
assert response.status_code == 200
85+
assert response.content_type.startswith("text/plain")
86+
87+
body = response.get_data(as_text=True)
88+
assert "http_requests_total" in body
89+
assert "http_request_duration_seconds_bucket" in body
90+
assert "http_requests_in_progress" in body
91+
assert "devops_info_endpoint_calls_total" in body
92+
assert "devops_info_system_collection_seconds_bucket" in body

ansible/group_vars/all.yml.example

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ compose_project_dir: "/opt/{{ app_name }}"
1717
# Wipe safety switch (used with --tags web_app_wipe)
1818
web_app_wipe: false
1919

20-
# Monitoring role (Lab07 bonus)
20+
# Monitoring role (Lab07-Lab08 bonus)
2121
monitoring_grafana_admin_user: "admin"
2222
monitoring_grafana_admin_password: "replace-with-strong-password"
2323
# monitoring_project_dir: "/opt/monitoring"
2424
# monitoring_app_enabled: true
25+
# monitoring_prometheus_retention_time: "15d"
26+
# monitoring_prometheus_retention_size: "10GB"

ansible/roles/monitoring/defaults/main.yml

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ monitoring_project_dir: /opt/monitoring
66
monitoring_loki_version: "3.0.0"
77
monitoring_promtail_version: "3.0.0"
88
monitoring_grafana_version: "12.3.1"
9+
monitoring_prometheus_version: "v3.9.0"
910

1011
# Ports
1112
monitoring_loki_port: 3100
1213
monitoring_promtail_port: 9080
1314
monitoring_grafana_port: 3000
15+
monitoring_prometheus_port: 9090
1416
monitoring_app_host_port: 8000
1517
monitoring_app_container_port: 5000
1618

@@ -19,17 +21,45 @@ monitoring_loki_schema_from: "2024-01-01"
1921
monitoring_loki_schema_version: "v13"
2022
monitoring_loki_retention_period: "168h"
2123

24+
# Prometheus settings
25+
monitoring_prometheus_retention_time: "15d"
26+
monitoring_prometheus_retention_size: "10GB"
27+
monitoring_prometheus_scrape_interval: "15s"
28+
monitoring_prometheus_evaluation_interval: "15s"
29+
monitoring_prometheus_targets:
30+
- job: "prometheus"
31+
targets:
32+
- "localhost:9090"
33+
- job: "app"
34+
targets:
35+
- "{{ monitoring_app_name }}:{{ monitoring_app_container_port }}"
36+
path: "/metrics"
37+
- job: "loki"
38+
targets:
39+
- "loki:3100"
40+
path: "/metrics"
41+
- job: "grafana"
42+
targets:
43+
- "grafana:3000"
44+
path: "/metrics"
45+
- job: "promtail"
46+
targets:
47+
- "promtail:9080"
48+
path: "/metrics"
49+
2250
# App integration
2351
monitoring_app_enabled: true
2452
monitoring_app_name: "devops-python"
2553
monitoring_app_image: "{{ dockerhub_username | default('linktur') }}/devops-lab2"
2654
monitoring_app_tag: "{{ docker_image_tag | default('latest') }}"
2755

28-
# Grafana security
56+
# Grafana security and provisioning
2957
monitoring_grafana_admin_user: "admin"
30-
monitoring_grafana_admin_password: "ChangeMe_Lab07_Replace"
31-
monitoring_grafana_datasource_uid: "loki"
32-
monitoring_grafana_dashboard_uid: "lab07-logs"
58+
monitoring_grafana_admin_password: "ChangeMe_Lab08_Replace"
59+
monitoring_grafana_loki_datasource_uid: "loki"
60+
monitoring_grafana_prometheus_datasource_uid: "prometheus"
61+
monitoring_grafana_logs_dashboard_uid: "lab07-logs"
62+
monitoring_grafana_metrics_dashboard_uid: "lab08-metrics"
3363

3464
# Compose behavior
3565
monitoring_compose_pull_policy: "always"
@@ -48,21 +78,28 @@ monitoring_resources:
4878
promtail:
4979
limits:
5080
cpus: "0.5"
51-
memory: "512M"
81+
memory: "256M"
5282
reservations:
5383
cpus: "0.10"
5484
memory: "128M"
55-
grafana:
85+
prometheus:
5686
limits:
5787
cpus: "1.0"
5888
memory: "1G"
5989
reservations:
6090
cpus: "0.25"
6191
memory: "256M"
62-
app:
92+
grafana:
6393
limits:
6494
cpus: "0.5"
6595
memory: "512M"
96+
reservations:
97+
cpus: "0.25"
98+
memory: "256M"
99+
app:
100+
limits:
101+
cpus: "0.5"
102+
memory: "256M"
66103
reservations:
67104
cpus: "0.10"
68105
memory: "128M"

ansible/roles/monitoring/tasks/deploy.yml

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@
2525
delay: 2
2626
timeout: "{{ monitoring_wait_timeout }}"
2727

28+
- name: Wait for Prometheus port
29+
ansible.builtin.wait_for:
30+
host: "127.0.0.1"
31+
port: "{{ monitoring_prometheus_port }}"
32+
delay: 2
33+
timeout: "{{ monitoring_wait_timeout }}"
34+
2835
- name: Wait for Grafana port
2936
ansible.builtin.wait_for:
3037
host: "127.0.0.1"
@@ -50,6 +57,16 @@
5057
delay: 3
5158
until: monitoring_loki_ready.status == 200
5259

60+
- name: Verify Prometheus healthy endpoint
61+
ansible.builtin.uri:
62+
url: "http://127.0.0.1:{{ monitoring_prometheus_port }}/-/healthy"
63+
method: GET
64+
status_code: 200
65+
register: monitoring_prometheus_ready
66+
retries: 20
67+
delay: 3
68+
until: monitoring_prometheus_ready.status == 200
69+
5370
- name: Verify Grafana health endpoint
5471
ansible.builtin.uri:
5572
url: "http://127.0.0.1:{{ monitoring_grafana_port }}/api/health"
@@ -64,7 +81,7 @@
6481
ansible.builtin.uri:
6582
url: >-
6683
http://127.0.0.1:{{ monitoring_grafana_port }}/api/datasources/uid/{{
67-
monitoring_grafana_datasource_uid }}
84+
monitoring_grafana_loki_datasource_uid }}
6885
method: GET
6986
user: "{{ monitoring_grafana_admin_user }}"
7087
password: "{{ monitoring_grafana_admin_password }}"
@@ -74,3 +91,18 @@
7491
retries: 20
7592
delay: 3
7693
until: monitoring_loki_datasource.status == 200
94+
95+
- name: Verify Prometheus datasource in Grafana
96+
ansible.builtin.uri:
97+
url: >-
98+
http://127.0.0.1:{{ monitoring_grafana_port }}/api/datasources/uid/{{
99+
monitoring_grafana_prometheus_datasource_uid }}
100+
method: GET
101+
user: "{{ monitoring_grafana_admin_user }}"
102+
password: "{{ monitoring_grafana_admin_password }}"
103+
force_basic_auth: true
104+
status_code: 200
105+
register: monitoring_prometheus_datasource
106+
retries: 20
107+
delay: 3
108+
until: monitoring_prometheus_datasource.status == 200

0 commit comments

Comments
 (0)