-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
73 lines (67 loc) · 2.26 KB
/
docker-compose.yml
File metadata and controls
73 lines (67 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
version: '3.8'
# ============================================================================
# KoboldCpp Docker Compose - Charluv Fork
# ============================================================================
#
# ⚠️ IMPORTANT: This is a Charluv-specialized fork of KoboldCpp
#
# Key differences from official KoboldCpp:
# - Modified horde integration for Charluv services
# - Custom API endpoints optimized for Charluv
#
# For official KoboldCpp: https://hub.docker.com/r/koboldai/koboldcpp
# For Charluv: https://charluv.com
#
# Designed for RunPod NVIDIA GPU deployment.
# Requires nvidia-container-toolkit on the host.
# ============================================================================
services:
koboldcpp:
build:
context: .
dockerfile: Dockerfile
container_name: koboldcpp
ports:
- "5001:5001"
volumes:
- ./models:/models
- ./models/loras:/models/loras
- ./data:/data
environment:
# Model: mount a local file or set MODEL_URL to download on first run
# Leave KOBOLDCPP_MODEL empty to auto-discover any .gguf in /models
- KOBOLDCPP_MODEL=
# - KOBOLDCPP_MODEL_URL=https://huggingface.co/.../model.gguf
# - KOBOLDCPP_MODEL_URL_FILENAME=/models/model.gguf
- KOBOLDCPP_HOST=0.0.0.0
- KOBOLDCPP_PORT=5001
- KOBOLDCPP_CONTEXT_SIZE=8192
- KOBOLDCPP_THREADS=0
- KOBOLDCPP_QUIET=true
# GPU - always CUDA on RunPod
- KOBOLDCPP_USE_GPU=cuda
- KOBOLDCPP_GPU_LAYERS=-1
- CUDA_VISIBLE_DEVICES=0
# LoRA (optional) - use URL to download on first run, or pipe-separate local paths
- KOBOLDCPP_LORA=
- KOBOLDCPP_LORA_URL=
- KOBOLDCPP_LORA_URL_FILENAME=/models/loras/lora.gguf
- KOBOLDCPP_LORA_MULT=1.0
- KOBOLDCPP_SDLORA=
- KOBOLDCPP_SDLORA_URL=
- KOBOLDCPP_SDLORA_URL_FILENAME=/models/loras/sdlora.safetensors
- KOBOLDCPP_SDLORA_MULT=1.0
# Charluv horde
- CHARLUV_HORDE_KEY=
- CHARLUV_HORDE_WORKER_NAME=
- CHARLUV_HORDE_MODEL_NAME=
- CHARLUV_HORDE_MAX_CTX=0
- CHARLUV_HORDE_GEN_LEN=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped