tiedocker/docker-compose.yml at main · demandcluster/tiedocker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
version: '3.8'

# ============================================================================
# KoboldCpp Docker Compose - Charluv Fork
# ============================================================================
#
# ⚠️  IMPORTANT: This is a Charluv-specialized fork of KoboldCpp
#
# Key differences from official KoboldCpp:
#   - Modified horde integration for Charluv services
#   - Custom API endpoints optimized for Charluv
#
# For official KoboldCpp: https://hub.docker.com/r/koboldai/koboldcpp
# For Charluv: https://charluv.com
#
# Designed for RunPod NVIDIA GPU deployment.
# Requires nvidia-container-toolkit on the host.
# ============================================================================

services:
  koboldcpp:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: koboldcpp
    ports:
      - "5001:5001"
    volumes:
      - ./models:/models
      - ./models/loras:/models/loras
      - ./data:/data
    environment:
      # Model: mount a local file or set MODEL_URL to download on first run
      # Leave KOBOLDCPP_MODEL empty to auto-discover any .gguf in /models
      - KOBOLDCPP_MODEL=
      # - KOBOLDCPP_MODEL_URL=https://huggingface.co/.../model.gguf
      # - KOBOLDCPP_MODEL_URL_FILENAME=/models/model.gguf

      - KOBOLDCPP_HOST=0.0.0.0
      - KOBOLDCPP_PORT=5001
      - KOBOLDCPP_CONTEXT_SIZE=8192
      - KOBOLDCPP_THREADS=0
      - KOBOLDCPP_QUIET=true

      # GPU - always CUDA on RunPod
      - KOBOLDCPP_USE_GPU=cuda
      - KOBOLDCPP_GPU_LAYERS=-1
      - CUDA_VISIBLE_DEVICES=0

      # LoRA (optional) - use URL to download on first run, or pipe-separate local paths
      - KOBOLDCPP_LORA=
      - KOBOLDCPP_LORA_URL=
      - KOBOLDCPP_LORA_URL_FILENAME=/models/loras/lora.gguf
      - KOBOLDCPP_LORA_MULT=1.0
      - KOBOLDCPP_SDLORA=
      - KOBOLDCPP_SDLORA_URL=
      - KOBOLDCPP_SDLORA_URL_FILENAME=/models/loras/sdlora.safetensors
      - KOBOLDCPP_SDLORA_MULT=1.0

      # Charluv horde
      - CHARLUV_HORDE_KEY=
      - CHARLUV_HORDE_WORKER_NAME=
      - CHARLUV_HORDE_MODEL_NAME=
      - CHARLUV_HORDE_MAX_CTX=0
      - CHARLUV_HORDE_GEN_LEN=0
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped