-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathDockerfile
More file actions
114 lines (92 loc) · 3.8 KB
/
Dockerfile
File metadata and controls
114 lines (92 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# =============================================================================
# Stage 1: Builder - Install dependencies and build wheels
# =============================================================================
ARG DOCKER_FROM=nvidia/cuda:12.8.0-runtime-ubuntu22.04
FROM ${DOCKER_FROM} AS builder
ARG DEBIAN_FRONTEND=noninteractive
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-dev \
python3-venv \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Create virtual environment
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Upgrade pip
RUN pip install --no-cache-dir --upgrade pip wheel setuptools
# Install PyTorch with CUDA 12.8 (required for RTX 5090/Blackwell sm_120)
RUN pip install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cu128
# Clone and install VibeVoice
WORKDIR /build
RUN git clone https://github.com/vibevoice-community/VibeVoice.git \
&& cd VibeVoice \
&& pip install --no-cache-dir -e .
# Install all Python dependencies
RUN pip install --no-cache-dir \
fastapi \
uvicorn \
python-multipart \
python-magic \
pydub \
openai-whisper \
soundfile \
transformers \
huggingface_hub
# Install flash-attention from pre-built wheel (PyTorch 2.10 + CUDA 12.8 + Python 3.10)
RUN pip install --no-cache-dir https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.12/flash_attn-2.6.3+cu128torch2.10-cp310-cp310-linux_x86_64.whl || true
# =============================================================================
# Stage 2: Runtime - Minimal image with only runtime dependencies
# =============================================================================
FROM ${DOCKER_FROM} AS runtime
ARG DEBIAN_FRONTEND=noninteractive
LABEL github_repo="https://github.com/vibevoice-community/VibeVoice"
# Install only runtime dependencies (no build tools)
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
ffmpeg \
sox \
libsox-fmt-all \
libsndfile1 \
libmagic1 \
git \
git-lfs \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean \
&& git lfs install
# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Copy VibeVoice installation to /app (not /workspace which RunPod overwrites)
COPY --from=builder /build/VibeVoice /app/VibeVoice
ENV SHELL=/bin/bash
# Create necessary directories
# Server files go in /app, models can be in /workspace for RunPod network volume
RUN mkdir -p /app/VibeVoice/server/outputs \
/app/VibeVoice/server/resources \
/workspace/models/vibevoice
# Remove any existing server files from the cloned repo and copy our own
RUN rm -f /app/VibeVoice/server/server.py /app/VibeVoice/server/start.sh 2>/dev/null || true
COPY server.py /app/VibeVoice/server/
COPY start.sh /app/VibeVoice/server/
COPY install_models.sh /app/VibeVoice/server/
COPY demo_speaker0.mp3 /app/VibeVoice/server/resources/
# Fix line endings (in case of Windows CRLF) and make executable
RUN sed -i 's/\r$//' /app/VibeVoice/server/start.sh \
&& sed -i 's/\r$//' /app/VibeVoice/server/install_models.sh \
&& chmod +x /app/VibeVoice/server/start.sh \
&& chmod +x /app/VibeVoice/server/install_models.sh \
&& cat /app/VibeVoice/server/start.sh
# Set environment variables for model paths (models can be on network volume)
ENV VIBEVOICE_MODEL_PATH=/workspace/models/vibevoice/VibeVoice-Large
ENV VIBEVOICE_TOKENIZER_PATH=/workspace/models/vibevoice/tokenizer
# Set the working directory to the server directory
WORKDIR /app/VibeVoice/server
# Expose port
EXPOSE 7860
# Set the entrypoint to our start script
ENTRYPOINT ["/bin/bash", "/app/VibeVoice/server/start.sh"]