-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
161 lines (147 loc) · 4.12 KB
/
docker-compose.yml
File metadata and controls
161 lines (147 loc) · 4.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
networks:
mlops_net:
driver: bridge
volumes:
mlflow_data:
prometheus_data:
grafana_data:
airflow_logs:
postgres_data:
x-airflow-env: &airflow-env
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CORE__FERNET_KEY: "46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLt0nfdstY="
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "false"
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
MLFLOW_TRACKING_URI: http://mlflow:5000
PROMETHEUS_PUSHGATEWAY: pushgateway:9091
services:
postgres:
image: postgres:15-alpine
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- postgres_data:/var/lib/postgresql/data
networks: [mlops_net]
healthcheck:
test: ["CMD-SHELL", "pg_isready -U airflow"]
interval: 10s
retries: 5
mlflow:
image: python:3.11-slim
ports:
- "5001:5000"
volumes:
- mlflow_data:/mlflow
entrypoint: ["/bin/bash", "-c", "pip install mlflow==2.13.0 -q && mlflow server --host 0.0.0.0 --port 5000 --backend-store-uri sqlite:////mlflow/mlflow.db --default-artifact-root /mlflow/artifacts"]
networks: [mlops_net]
healthcheck:
test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:5000/health')\""]
interval: 30s
timeout: 10s
retries: 10
start_period: 120s
api:
shm_size: 2gb
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
MLFLOW_TRACKING_URI: http://mlflow:5000
volumes:
- .:/app
networks: [mlops_net]
depends_on:
mlflow:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
interval: 20s
retries: 5
prometheus:
image: prom/prometheus:v2.50.0
ports:
- "9090:9090"
volumes:
- ./infra/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=30d"
networks: [mlops_net]
pushgateway:
image: prom/pushgateway:v1.7.0
ports:
- "9091:9091"
networks: [mlops_net]
grafana:
image: grafana/grafana:10.3.0
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
GF_USERS_ALLOW_SIGN_UP: "false"
volumes:
- grafana_data:/var/lib/grafana
- ./infra/grafana/provisioning:/etc/grafana/provisioning:ro
networks: [mlops_net]
depends_on: [prometheus]
airflow-init:
image: apache/airflow:2.9.0-python3.11
environment:
<<: *airflow-env
entrypoint: /bin/bash
command:
- -c
- |
airflow db migrate &&
airflow users create \
--username admin --password admin \
--firstname Admin --lastname User \
--role Admin --email admin@example.com
volumes:
- airflow_logs:/opt/airflow/logs
- ./pipeline:/opt/airflow/dags
networks: [mlops_net]
depends_on:
postgres:
condition: service_healthy
airflow-scheduler:
image: apache/airflow:2.9.0-python3.11
command: scheduler
environment:
<<: *airflow-env
volumes:
- airflow_logs:/opt/airflow/logs
- ./pipeline:/opt/airflow/dags
- ./model:/opt/airflow/model
- ./monitoring:/opt/airflow/monitoring
networks: [mlops_net]
depends_on:
airflow-init:
condition: service_completed_successfully
restart: unless-stopped
airflow-webserver:
image: apache/airflow:2.9.0-python3.11
command: webserver
ports:
- "8080:8080"
environment:
<<: *airflow-env
volumes:
- airflow_logs:/opt/airflow/logs
- ./pipeline:/opt/airflow/dags
networks: [mlops_net]
depends_on:
airflow-init:
condition: service_completed_successfully
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8080/health || exit 1"]
interval: 30s
retries: 5