webKinPred/docker-compose.prod.yml at master · Toepfer-Lab/webKinPred · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# ─────────────────────────────────────────────────────────────────────────────
# Production compose file
#
# Service → Image mapping:
#   backend     → Dockerfile.web   (slim Python, no conda)  ~seconds to rebuild
#   celery-beat → Dockerfile.web   (slim Python, no conda)  reuses backend image
#   celery      → Dockerfile       (worker runtime; reuses prebuilt webkinpred-envs image)
#   frontend    → frontend/Dockerfile.prod
#
# Use deploy.sh for all deploys — it handles BuildKit flags and prunes dangling
# images automatically so disk space does not accumulate:
#
#   ./deploy.sh                  # full prod deploy
#   ./deploy.sh prod celery      # rebuild only the celery worker
# ─────────────────────────────────────────────────────────────────────────────

# ── Reusable fragments ────────────────────────────────────────────────────────
x-common-env: &common-env
  DEBUG: "0"
  DJANGO_SETTINGS_MODULE: webKinPred.settings_docker
  DJANGO_SECRET_KEY: ${DJANGO_SECRET_KEY:?DJANGO_SECRET_KEY must be set}
  REDIS_HOST: redis
  REDIS_PORT: "6379"
  GPU_EMBED_SERVICE_URL: ${GPU_EMBED_SERVICE_URL:-}
  GPU_EMBED_SERVICE_TOKEN: ${GPU_EMBED_SERVICE_TOKEN:-}
  GPU_EMBED_HEALTH_TTL_SECONDS: ${GPU_EMBED_HEALTH_TTL_SECONDS:-10}
  GPU_EMBED_JOB_TIMEOUT_SECONDS: ${GPU_EMBED_JOB_TIMEOUT_SECONDS:-1200}
  GPU_EMBED_FAIL_CLOSED: ${GPU_EMBED_FAIL_CLOSED:-0}

x-web-build: &web-build
  build:
    context: .
    dockerfile: Dockerfile.web
  image: webkinpred-web:latest

services:
  # ── Redis ───────────────────────────────────────────────────────────────────
  redis:
    image: redis:7-alpine
    volumes:
      - redis_data:/data
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 3s
      retries: 3

  # ── Backend (Django / Gunicorn) ─────────────────────────────────────────────
  # Lightweight web image — handles HTTP only, queues tasks to celery.
  # No ML inference here, no conda envs needed.
  backend:
    <<: *web-build
    ports:
      - "8000:8000"
    volumes:
      - ./media:/app/media
      - ./db.sqlite3:/app/db.sqlite3
      - ./staticfiles:/app/staticfiles
      - ./fastas/dbs:/app/fastas/dbs:ro
    environment:
      <<: *common-env
    depends_on:
      redis:
        condition: service_healthy
    restart: unless-stopped
    command: >-
      sh -c "python manage.py migrate &&
             python manage.py migrate --database=seqmap &&
             python manage.py collectstatic --noinput --clear &&
             gunicorn webKinPred.wsgi:application --bind 0.0.0.0:8000 --workers 2 --worker-class sync --timeout 300 --graceful-timeout 30 --access-logfile - --error-logfile - --capture-output"

  # ── Frontend (nginx + React SPA) ────────────────────────────────────────────
  frontend:
    build:
      context: ./frontend
      dockerfile: Dockerfile.prod
    ports:
      - "3000:80"
    depends_on:
      - backend
    restart: unless-stopped

  # ── Celery worker ───────────────────────────────────────────────────────────
  # Runtime worker image layered on top of prebuilt conda env image.
  # --max-tasks-per-child: worker subprocess is replaced after N tasks,
  #   freeing all ML model memory that accumulated during inference.
  celery:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        WEBKINPRED_ENVS_IMAGE: ${WEBKINPRED_ENVS_IMAGE:-webkinpred-envs:latest}
    image: webkinpred-worker:latest
    volumes:
      - ./media:/app/media
      - ./db.sqlite3:/app/db.sqlite3
      - ./media/sequence_info/seqmap.sqlite3:/app/media/sequence_info/seqmap.sqlite3
      - ./staticfiles:/app/staticfiles
      - ./fastas/dbs:/app/fastas/dbs:ro
      - ./models/EITLEM/Weights:/app/models/EITLEM/Weights:ro
      - ./models/TurNup/data/saved_models:/app/models/TurNup/data/saved_models:ro
      - ./models/UniKP-main/models:/app/models/UniKP-main/models:ro
      - ./models/DLKcat/DeeplearningApproach:/app/models/DLKcat/DeeplearningApproach:ro
      - ./models/KinForm/results/trained_models:/app/models/KinForm/results/trained_models:ro
      - ./models/CataPro:/app/models/CataPro:ro
      - ./models/CatPred:/app/models/CatPred:ro
    environment:
      <<: *common-env
    depends_on:
      redis:
        condition: service_healthy
      backend:
        condition: service_started
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "celery", "-A", "webKinPred", "inspect", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    command: >
      celery -A webKinPred worker
        --loglevel=info
        --queues=webkinpred
        --concurrency=1
        --max-tasks-per-child=10

  # ── Celery beat scheduler ───────────────────────────────────────────────────
  # Reuses the lightweight web image — only schedules tasks,
  # no model inference, no conda envs needed.
  celery-beat:
    <<: *web-build
    volumes:
      - ./media:/app/media
      - ./db.sqlite3:/app/db.sqlite3
      - ./media/sequence_info/seqmap.sqlite3:/app/media/sequence_info/seqmap.sqlite3
    environment:
      <<: *common-env
    depends_on:
      redis:
        condition: service_healthy
      backend:
        condition: service_started
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "pgrep", "-f", "celery.*beat"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    command: celery -A webKinPred beat --loglevel=info

volumes:
  redis_data: