|
| 1 | +# syntax = edrevo/dockerfile-plus |
| 2 | +ARG UBUNTU_VERSION |
| 3 | + |
| 4 | +# Build stage |
| 5 | +FROM nvidia/cuda:12.1.1-base-ubuntu${UBUNTU_VERSION}.04 AS builder |
| 6 | + |
| 7 | +ENV NCCL_HOME=/opt/nccl |
| 8 | +ENV CUDA_HOME=/usr/local/cuda |
| 9 | +ENV OPEN_MPI_PATH=/usr/lib/x86_64-linux-gnu/openmpi |
| 10 | + |
| 11 | +# Prerequisites |
| 12 | + |
| 13 | +RUN export DEBIAN_FRONTEND=noninteractive \ |
| 14 | + && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}04/x86_64/3bf863cc.pub \ |
| 15 | + && apt-get update --fix-missing \ |
| 16 | + && apt-get upgrade -y \ |
| 17 | + && ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime \ |
| 18 | + && apt-get install -y tzdata \ |
| 19 | + && dpkg-reconfigure --frontend noninteractive tzdata \ |
| 20 | + && cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ |
| 21 | + && apt-get install -y --no-install-recommends \ |
| 22 | + cuda-libraries-dev-${cuda_version} \ |
| 23 | + cuda-nvcc-${cuda_version} \ |
| 24 | + libhwloc-dev \ |
| 25 | + autoconf \ |
| 26 | + automake \ |
| 27 | + libtool \ |
| 28 | + libopenmpi-dev \ |
| 29 | + git \ |
| 30 | + curl \ |
| 31 | + python3 \ |
| 32 | + build-essential |
| 33 | + |
| 34 | +# NCCL |
| 35 | + |
| 36 | +ARG NCCL_VERSION=2.26.2-1 |
| 37 | + |
| 38 | +RUN cd /tmp \ |
| 39 | + && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \ |
| 40 | + && cd nccl \ |
| 41 | + && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} |
| 42 | + |
| 43 | +# NCCL tests |
| 44 | + |
| 45 | +RUN cd /opt \ |
| 46 | + && git clone https://github.com/NVIDIA/nccl-tests \ |
| 47 | + && cd nccl-tests \ |
| 48 | + && make -j$(nproc) \ |
| 49 | + MPI=1 \ |
| 50 | + MPI_HOME=${OPEN_MPI_PATH} \ |
| 51 | + CUDA_HOME=${CUDA_HOME} \ |
| 52 | + NCCL_HOME=${NCCL_HOME} |
| 53 | + |
| 54 | +# Final stage |
| 55 | + |
| 56 | +INCLUDE+ base/Dockerfile.common |
| 57 | + |
| 58 | +ENV NCCL_HOME=/opt/nccl |
| 59 | + |
| 60 | +COPY --from=builder ${NCCL_HOME} ${NCCL_HOME} |
| 61 | +COPY --from=builder /opt/nccl-tests/build /opt/nccl-tests/build |
| 62 | + |
1 | 63 | ARG FLAVOR |
2 | | -FROM nvidia/cuda:12.1.1-${FLAVOR}-ubuntu20.04 |
3 | | - |
4 | | -ARG PYTHON |
5 | | -ARG _UV_HOME="/opt/uv" |
6 | | -ENV UV_PYTHON="${PYTHON}" |
7 | | -ENV UV_INSTALL_DIR="${_UV_HOME}/bin" |
8 | | -ENV UV_PYTHON_INSTALL_DIR="${_UV_HOME}/python" |
9 | | -ENV UV_PYTHON_BIN_DIR="${UV_PYTHON_INSTALL_DIR}/bin" |
10 | | -ENV UV_MANAGED_PYTHON=1 |
11 | | -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 |
12 | | - |
13 | | -ENV PATH="${UV_INSTALL_DIR}:${UV_PYTHON_BIN_DIR}:${PATH}" |
14 | | - |
15 | | -RUN export DEBIAN_FRONTEND=noninteractive && \ |
16 | | - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ |
17 | | - apt-get update --fix-missing && \ |
18 | | - apt-get upgrade -y && \ |
19 | | - ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime && \ |
20 | | - apt-get install -y tzdata && \ |
21 | | - dpkg-reconfigure --frontend noninteractive tzdata && \ |
22 | | - apt-get install -y bzip2 ca-certificates curl build-essential git libglib2.0-0 libsm6 libxext6 libxrender1 mercurial openssh-server subversion wget \ |
23 | | - libibverbs1 ibverbs-providers ibverbs-utils libibverbs-dev infiniband-diags && \ |
24 | | - sed -i "s/.*PasswordAuthentication.*/PasswordAuthentication no/g" /etc/ssh/sshd_config && mkdir /run/sshd && \ |
25 | | - mkdir ~/.ssh && chmod 700 ~/.ssh && touch ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && rm /etc/ssh/ssh_host_* |
26 | | - |
27 | | -RUN curl -LsSf https://astral.sh/uv/install.sh | INSTALLER_NO_MODIFY_PATH=1 sh && \ |
28 | | - uv python install --preview --default |
| 64 | + |
| 65 | +# MPI, NVCC, and /etc/ld.so.conf.d |
| 66 | + |
| 67 | +RUN apt-get update \ |
| 68 | + && apt-get install -y --no-install-recommends \ |
| 69 | + openmpi-bin \ |
| 70 | + && if [ "$FLAVOR" = "devel" ]; then \ |
| 71 | + cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ |
| 72 | + && apt-get install -y --no-install-recommends \ |
| 73 | + cuda-libraries-dev-${cuda_version} \ |
| 74 | + cuda-nvcc-${cuda_version} \ |
| 75 | + libhwloc-dev; \ |
| 76 | + fi \ |
| 77 | + && rm -rf /var/lib/apt/lists/* \ |
| 78 | + && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \ |
| 79 | + && ldconfig |
0 commit comments