drive-rag/docker-compose.yml at main · MineTech0/drive-rag · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
version: '3.8'

services:
  postgres:
    image: pgvector/pgvector:pg15
    container_name: rag_postgres
    environment:
      POSTGRES_USER: rag_user
      POSTGRES_PASSWORD: rag_password
      POSTGRES_DB: rag_db
    ports:
      - "5432:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./migrations/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U rag_user -d rag_db"]
      interval: 10s
      timeout: 5s
      retries: 5

  redis:
    image: redis:7-alpine
    container_name: rag_redis
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5

  api:
    build:
      context: .
      args:
        ENABLE_CUDA: "true"  # Enable CUDA support
    container_name: rag_api
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    environment:
      - DB_URL=postgresql+psycopg://rag_user:rag_password@postgres:5432/rag_db
      - REDIS_URL=redis://redis:6379/0
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=all
      - PYTHONUNBUFFERED=1  # Show Python output immediately in logs
      - GOOGLE_APPLICATION_CREDENTIALS=/secrets/sa.json
      - HF_HOME=/root/.cache/huggingface  # HuggingFace cache directory
      - SENTENCE_TRANSFORMERS_HOME=/root/.cache/huggingface/sentence-transformers  # Sentence transformers cache
      - PIP_CACHE_DIR=/root/.cache/pip  # Pip cache directory for wheel downloads
      - TORCH_HOME=/root/.cache/torch  # PyTorch cache directory
    env_file:
      - .env
    ports:
      - "8000:8000"
    volumes:
      - ./secrets:/secrets:ro
      - ./app:/app/app
      - model_cache:/root/.cache  # Cache all models (huggingface, torch, etc)
      - pip_cache:/root/.cache/pip  # Persist pip cache for faster builds
    command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              capabilities: [gpu, compute, utility]

  # celery_worker:
  #   build:
  #     context: .
  #     args:
  #       ENABLE_CUDA: "true"  # Enable CUDA support
  #   container_name: rag_celery_worker
  #   depends_on:
  #     postgres:
  #       condition: service_healthy
  #     redis:
  #       condition: service_healthy
  #   environment:
      # - DB_URL=postgresql+psycopg://rag_user:rag_password@postgres:5432/rag_db
      # - REDIS_URL=redis://redis:6379/0
      # - NVIDIA_VISIBLE_DEVICES=all
      # - NVIDIA_DRIVER_CAPABILITIES=all
      # - PYTHONUNBUFFERED=1  # Show Python output immediately in logs
      # - GCP_REGION=europe-north1
      # - GOOGLE_APPLICATION_CREDENTIALS=/secrets/sa.json
      # - HF_HOME=/root/.cache/huggingface  # HuggingFace cache directory
      # - SENTENCE_TRANSFORMERS_HOME=/root/.cache/huggingface/sentence-transformers  # Sentence transformers cache
      # - PIP_CACHE_DIR=/root/.cache/pip  # Pip cache directory for wheel downloads
      # - TORCH_HOME=/root/.cache/torch  # PyTorch cache directory
  #   env_file:
  #     - .env
  #   volumes:
  #     - ./secrets:/secrets:ro
  #     - ./app:/app/app
  #     - model_cache:/root/.cache  # Cache all models (huggingface, torch, etc)
  #     - pip_cache:/root/.cache/pip  # Persist pip cache for faster builds
  #   command: celery -A app.tasks.celery_app worker --loglevel=info
  #   deploy:
  #     resources:
  #       reservations:
  #         devices:
  #           - driver: nvidia
  #             capabilities: [gpu, compute, utility]

volumes:
  postgres_data:
  redis_data:
  model_cache:  # Persistent storage for downloaded ML models
  pip_cache:  # Persistent pip cache for faster Docker builds