forked from morphik-org/morphik-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdockerfile.colpali
More file actions
124 lines (101 loc) · 3.52 KB
/
dockerfile.colpali
File metadata and controls
124 lines (101 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# syntax=docker/dockerfile:1
# Purpose: Dedicated Docker image for ColPali Embedding Service with Flash Attention
# This service uses NVIDIA CUDA base image and flash-attn for 2x speedup on modern GPUs
# Compatible with: Ampere (RTX 30xx/40xx, A100), Ada (RTX 40xx), Hopper (H100)
# For Turing GPUs (T4, RTX 20xx) use flash-attention-turing instead
# Memory efficient: Single model instance serves both main server and worker
# Build stage - use NVIDIA CUDA image for compilation
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder
# Install Python 3.11
RUN apt-get update && apt-get install -y \
python3.11 \
python3.11-dev \
python3.11-venv \
python3-pip \
gcc \
g++ \
cmake \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Make python3.11 the default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Set working directory
WORKDIR /app
# Set uv environment variables
ENV UV_LINK_MODE=copy
ENV UV_CACHE_DIR=/root/.cache/uv
ENV VIRTUAL_ENV=/app/.venv
ENV PATH="/app/.venv/bin:${PATH}"
ENV CUDA_HOME=/usr/local/cuda
# Create minimal pyproject.toml for ColPali dependencies
RUN echo '[project]\n\
name = "colpali-service"\n\
version = "1.0.0"\n\
requires-python = ">=3.11"\n\
dependencies = [\n\
"fastapi>=0.115.12",\n\
"uvicorn>=0.34.0",\n\
"torch>=2.5.1",\n\
"numpy>=2.2.3",\n\
"pillow>=11.1.0",\n\
"pydantic>=2.11.1",\n\
"httpx>=0.28.1",\n\
"ninja>=1.11.1",\n\
"packaging>=24.0",\n\
]\n\
' > pyproject.toml
# Create venv and install base dependencies
RUN --mount=type=cache,target=${UV_CACHE_DIR} \
uv venv && \
uv pip install fastapi uvicorn torch numpy pillow pydantic httpx
# Flash Attention disabled for now - using eager mode
# Works out of the box, ~30-50% slower but stable
# To enable flash-attn later, add proper wheel installation above
# Install colpali-engine from git
RUN --mount=type=cache,target=${UV_CACHE_DIR} \
uv pip install 'colpali-engine@git+https://github.com/illuin-tech/colpali@80fb72c9b827ecdb5687a3a8197077d0d01791b3'
# Production stage - use NVIDIA CUDA runtime (lightweight without flash-attn)
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
# Set working directory
WORKDIR /app
# Install Python 3.11 and runtime dependencies
RUN apt-get update && apt-get install -y \
python3.11 \
python3.11-venv \
libsm6 \
libxext6 \
curl \
&& rm -rf /var/lib/apt/lists/*
# Make python3.11 the default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
# Copy the virtual environment from builder
COPY --from=builder /app/.venv /app/.venv
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV VIRTUAL_ENV=/app/.venv
ENV PATH="/app/.venv/bin:${PATH}"
ENV CUDA_HOME=/usr/local/cuda
# Copy service code
COPY colpali_service.py ./
# Create startup script with health checks
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
echo "Starting ColPali Embedding Service..."\n\
\n\
# Start the service\n\
exec python colpali_service.py\n\
' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh
# Labels
LABEL org.opencontainers.image.title="ColPali Embedding Service"
LABEL org.opencontainers.image.description="Dedicated microservice for ColPali embeddings"
LABEL org.opencontainers.image.version="1.0.0"
# Expose port
EXPOSE 8001
# Set entrypoint
ENTRYPOINT ["/app/entrypoint.sh"]