KittenTTS-FastAPI/docker-compose-gpu.yml at main · richardr1126/KittenTTS-FastAPI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
services:
  kittentts-fastapi:
    container_name: kittentts-fastapi
    build:
      args:
      # Can be nvidia or cpu; Default is Nvidia
        - RUNTIME=nvidia
      context: .
      dockerfile: Dockerfile
    ports:
      - "${PORT:-8005}:8005"
    env_file:
      - .env

    # --- GPU Support (NVIDIA) ---
    # The 'deploy' key is the modern way to request GPU resources.
    # If you get a 'CDI device injection failed' error, comment out the 'deploy' section
    # and uncomment the 'runtime: nvidia' line below.

    # Method 1: Modern Docker Compose (Recommended)
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    # Method 2: Legacy Docker Compose (for older setups)
    # runtime: nvidia

    restart: unless-stopped
    environment:
      # Enable faster Hugging Face downloads inside the container
      - HF_HUB_ENABLE_HF_TRANSFER=1
      # Make NVIDIA GPUs visible and specify capabilities for PyTorch
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
    healthcheck:
      test: ["CMD-SHELL", "curl -fsS http://localhost:8005/health/ready || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 5
      start_period: 90s