-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
116 lines (111 loc) · 3.74 KB
/
docker-compose.yml
File metadata and controls
116 lines (111 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
version: '3.8'
services:
postgres:
image: pgvector/pgvector:pg15
container_name: rag_postgres
environment:
POSTGRES_USER: rag_user
POSTGRES_PASSWORD: rag_password
POSTGRES_DB: rag_db
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./migrations/init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U rag_user -d rag_db"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
container_name: rag_redis
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
api:
build:
context: .
args:
ENABLE_CUDA: "true" # Enable CUDA support
container_name: rag_api
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
environment:
- DB_URL=postgresql+psycopg://rag_user:rag_password@postgres:5432/rag_db
- REDIS_URL=redis://redis:6379/0
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=all
- PYTHONUNBUFFERED=1 # Show Python output immediately in logs
- GOOGLE_APPLICATION_CREDENTIALS=/secrets/sa.json
- HF_HOME=/root/.cache/huggingface # HuggingFace cache directory
- SENTENCE_TRANSFORMERS_HOME=/root/.cache/huggingface/sentence-transformers # Sentence transformers cache
- PIP_CACHE_DIR=/root/.cache/pip # Pip cache directory for wheel downloads
- TORCH_HOME=/root/.cache/torch # PyTorch cache directory
env_file:
- .env
ports:
- "8000:8000"
volumes:
- ./secrets:/secrets:ro
- ./app:/app/app
- model_cache:/root/.cache # Cache all models (huggingface, torch, etc)
- pip_cache:/root/.cache/pip # Persist pip cache for faster builds
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu, compute, utility]
# celery_worker:
# build:
# context: .
# args:
# ENABLE_CUDA: "true" # Enable CUDA support
# container_name: rag_celery_worker
# depends_on:
# postgres:
# condition: service_healthy
# redis:
# condition: service_healthy
# environment:
# - DB_URL=postgresql+psycopg://rag_user:rag_password@postgres:5432/rag_db
# - REDIS_URL=redis://redis:6379/0
# - NVIDIA_VISIBLE_DEVICES=all
# - NVIDIA_DRIVER_CAPABILITIES=all
# - PYTHONUNBUFFERED=1 # Show Python output immediately in logs
# - GCP_REGION=europe-north1
# - GOOGLE_APPLICATION_CREDENTIALS=/secrets/sa.json
# - HF_HOME=/root/.cache/huggingface # HuggingFace cache directory
# - SENTENCE_TRANSFORMERS_HOME=/root/.cache/huggingface/sentence-transformers # Sentence transformers cache
# - PIP_CACHE_DIR=/root/.cache/pip # Pip cache directory for wheel downloads
# - TORCH_HOME=/root/.cache/torch # PyTorch cache directory
# env_file:
# - .env
# volumes:
# - ./secrets:/secrets:ro
# - ./app:/app/app
# - model_cache:/root/.cache # Cache all models (huggingface, torch, etc)
# - pip_cache:/root/.cache/pip # Persist pip cache for faster builds
# command: celery -A app.tasks.celery_app worker --loglevel=info
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# capabilities: [gpu, compute, utility]
volumes:
postgres_data:
redis_data:
model_cache: # Persistent storage for downloaded ML models
pip_cache: # Persistent pip cache for faster Docker builds