-
Notifications
You must be signed in to change notification settings - Fork 221
Rework default Docker images #2799
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5290d98
53a717d
e412313
22b2530
2891ccb
5b0539f
a4e6d29
b102ddb
ee05d46
eac604a
0172ab7
8418ad7
b5710a0
f46afe8
59edf64
76ed3bc
0e36c85
6225135
dec318f
5a34a9c
562266f
2fd1bdf
dbb3545
bcfc856
fc5a8eb
1902cde
709b63c
312a9ee
2b58f67
8ae1be6
a24bd1a
2946a20
9903d01
8b796bb
305e5f1
46a7d51
0cbf5b9
0105f70
31dfd39
2358683
27520eb
de67511
bdaa059
035eced
57ab13d
8acfff9
1b1c02b
78dc094
1367167
6d5ceb2
3894ba9
a646ebc
48b4b9d
d8d755c
49b7cb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,28 +1,79 @@ | ||
| # syntax = edrevo/dockerfile-plus | ||
| ARG UBUNTU_VERSION | ||
|
|
||
| # Build stage | ||
| FROM nvidia/cuda:12.1.1-base-ubuntu${UBUNTU_VERSION}.04 AS builder | ||
|
|
||
| ENV NCCL_HOME=/opt/nccl | ||
| ENV CUDA_HOME=/usr/local/cuda | ||
| ENV OPEN_MPI_PATH=/usr/lib/x86_64-linux-gnu/openmpi | ||
|
|
||
| # Prerequisites | ||
|
|
||
| RUN export DEBIAN_FRONTEND=noninteractive \ | ||
| && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}04/x86_64/3bf863cc.pub \ | ||
| && apt-get update --fix-missing \ | ||
| && apt-get upgrade -y \ | ||
| && ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime \ | ||
| && apt-get install -y tzdata \ | ||
| && dpkg-reconfigure --frontend noninteractive tzdata \ | ||
| && cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ | ||
| && apt-get install -y --no-install-recommends \ | ||
| cuda-libraries-dev-${cuda_version} \ | ||
| cuda-nvcc-${cuda_version} \ | ||
| libhwloc-dev \ | ||
| autoconf \ | ||
| automake \ | ||
| libtool \ | ||
| libopenmpi-dev \ | ||
| git \ | ||
| curl \ | ||
| python3 \ | ||
| build-essential | ||
|
|
||
| # NCCL | ||
|
|
||
| ARG NCCL_VERSION=2.26.2-1 | ||
|
|
||
| RUN cd /tmp \ | ||
| && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \ | ||
| && cd nccl \ | ||
| && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} | ||
|
|
||
| # NCCL tests | ||
|
|
||
| RUN cd /opt \ | ||
| && git clone https://github.com/NVIDIA/nccl-tests \ | ||
| && cd nccl-tests \ | ||
| && make -j$(nproc) \ | ||
| MPI=1 \ | ||
| MPI_HOME=${OPEN_MPI_PATH} \ | ||
| CUDA_HOME=${CUDA_HOME} \ | ||
| NCCL_HOME=${NCCL_HOME} | ||
|
|
||
| # Final stage | ||
|
|
||
| INCLUDE+ base/Dockerfile.common | ||
|
|
||
| ENV NCCL_HOME=/opt/nccl | ||
|
|
||
| COPY --from=builder ${NCCL_HOME} ${NCCL_HOME} | ||
| COPY --from=builder /opt/nccl-tests/build /opt/nccl-tests/build | ||
|
|
||
| ARG FLAVOR | ||
| FROM nvidia/cuda:12.1.1-${FLAVOR}-ubuntu20.04 | ||
|
|
||
| ARG PYTHON | ||
| ARG _UV_HOME="/opt/uv" | ||
| ENV UV_PYTHON="${PYTHON}" | ||
| ENV UV_INSTALL_DIR="${_UV_HOME}/bin" | ||
| ENV UV_PYTHON_INSTALL_DIR="${_UV_HOME}/python" | ||
| ENV UV_PYTHON_BIN_DIR="${UV_PYTHON_INSTALL_DIR}/bin" | ||
| ENV UV_MANAGED_PYTHON=1 | ||
| ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 | ||
|
|
||
| ENV PATH="${UV_INSTALL_DIR}:${UV_PYTHON_BIN_DIR}:${PATH}" | ||
|
|
||
| RUN export DEBIAN_FRONTEND=noninteractive && \ | ||
| apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ | ||
| apt-get update --fix-missing && \ | ||
| apt-get upgrade -y && \ | ||
| ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime && \ | ||
| apt-get install -y tzdata && \ | ||
| dpkg-reconfigure --frontend noninteractive tzdata && \ | ||
| apt-get install -y bzip2 ca-certificates curl build-essential git libglib2.0-0 libsm6 libxext6 libxrender1 mercurial openssh-server subversion wget \ | ||
| libibverbs1 ibverbs-providers ibverbs-utils libibverbs-dev infiniband-diags && \ | ||
| sed -i "s/.*PasswordAuthentication.*/PasswordAuthentication no/g" /etc/ssh/sshd_config && mkdir /run/sshd && \ | ||
| mkdir ~/.ssh && chmod 700 ~/.ssh && touch ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && rm /etc/ssh/ssh_host_* | ||
|
|
||
| RUN curl -LsSf https://astral.sh/uv/install.sh | INSTALLER_NO_MODIFY_PATH=1 sh && \ | ||
| uv python install --preview --default | ||
|
|
||
| # MPI, NVCC, and /etc/ld.so.conf.d | ||
|
|
||
| RUN apt-get update \ | ||
| && apt-get install -y --no-install-recommends \ | ||
| openmpi-bin \ | ||
| && if [ "$FLAVOR" = "devel" ]; then \ | ||
| cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ | ||
| && apt-get install -y --no-install-recommends \ | ||
| cuda-libraries-dev-${cuda_version} \ | ||
| cuda-nvcc-${cuda_version} \ | ||
| libhwloc-dev; \ | ||
| fi \ | ||
| && rm -rf /var/lib/apt/lists/* \ | ||
| && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \ | ||
| && ldconfig | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| ARG UBUNTU_VERSION | ||
|
|
||
| FROM nvidia/cuda:12.1.1-base-ubuntu${UBUNTU_VERSION}.04 | ||
|
|
||
| ARG _UV_HOME="/opt/uv" | ||
|
|
||
| ENV UV_INSTALL_DIR="${_UV_HOME}/bin" | ||
| ENV UV_MANAGED_PYTHON=1 | ||
| ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 | ||
|
|
||
| ENV PATH="${UV_INSTALL_DIR}:${PATH}" | ||
|
|
||
| ENV OMPI_MCA_pml=^cm,ucx | ||
| ENV OMPI_MCA_btl=tcp,self | ||
| ENV OMPI_MCA_btl_tcp_if_exclude=lo,docker0 | ||
| ENV NCCL_SOCKET_IFNAME=^docker,lo | ||
|
|
||
| RUN export DEBIAN_FRONTEND=noninteractive \ | ||
| && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}04/x86_64/3bf863cc.pub \ | ||
| && apt-get update --fix-missing \ | ||
| && apt-get upgrade -y \ | ||
| && ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime \ | ||
| && apt-get install -y tzdata \ | ||
| && dpkg-reconfigure --frontend noninteractive tzdata \ | ||
| && apt-get install -y bzip2 ca-certificates curl build-essential git libglib2.0-0 libsm6 libxext6 libxrender1 mercurial openssh-server subversion wget \ | ||
| libibverbs1 ibverbs-providers ibverbs-utils libibverbs-dev infiniband-diags \ | ||
| && rm -rf /var/lib/apt/lists/* \ | ||
| && sed -i "s/.*PasswordAuthentication.*/PasswordAuthentication no/g" /etc/ssh/sshd_config \ | ||
| && mkdir /run/sshd \ | ||
| && mkdir ~/.ssh && chmod 700 ~/.ssh && touch ~/.ssh/authorized_keys \ | ||
| && chmod 600 ~/.ssh/authorized_keys \ | ||
| && rm /etc/ssh/ssh_host_* | ||
|
|
||
| RUN curl -LsSf https://astral.sh/uv/install.sh | INSTALLER_NO_MODIFY_PATH=1 sh \ | ||
| && uv python install --preview --default |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,15 @@ | ||
| ARG BASE_IMAGE=dstackai/base:py3.12-0.7-cuda-12.1 | ||
| # syntax = edrevo/dockerfile-plus | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we live without it? An unfamiliar dependency that is no longer maintained.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without this dependency, we would need to duplicate the code |
||
|
|
||
| FROM ${BASE_IMAGE} | ||
| INCLUDE+ base/Dockerfile.common | ||
|
|
||
| ENV PREFIX=/usr/local | ||
| ENV CUDA_PATH=/usr/local/cuda | ||
| ENV NCCL_HOME=/usr/local | ||
| ENV CUDA_HOME=/usr/local/cuda | ||
| ENV LIBFABRIC_PATH=/opt/amazon/efa | ||
| ENV OPEN_MPI_PATH=/opt/amazon/openmpi | ||
| ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}" | ||
| ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}" | ||
|
|
||
| # prerequisites | ||
| # Prerequisites | ||
|
|
||
| RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ | ||
| && apt-get update \ | ||
|
|
@@ -19,61 +19,58 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ | |
| libhwloc-dev \ | ||
| autoconf \ | ||
| automake \ | ||
| libtool | ||
| libtool \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| # EFA | ||
|
|
||
| ARG EFA_VERSION=1.38.1 | ||
|
|
||
| RUN cd $HOME \ | ||
| RUN cd /tmp \ | ||
| && apt-get update \ | ||
| && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \ | ||
| && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \ | ||
| && cd aws-efa-installer \ | ||
| && ./efa_installer.sh -y --skip-kmod -g | ||
| && ./efa_installer.sh -y --skip-kmod -g \ | ||
| && rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/* | ||
|
|
||
| # NCCL | ||
|
|
||
| ARG NCCL_VERSION=2.26.2-1 | ||
|
|
||
| RUN cd $HOME \ | ||
| RUN cd /tmp \ | ||
| && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \ | ||
| && cd nccl \ | ||
| && make -j$(nproc) src.build BUILDDIR=${PREFIX} | ||
| && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \ | ||
| && rm -rf /tmp/nccl | ||
|
|
||
| # AWS OFI NCCL | ||
|
|
||
| ARG OFI_VERSION=1.14.0 | ||
|
|
||
| RUN cd $HOME \ | ||
| RUN cd /tmp \ | ||
| && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \ | ||
| && cd aws-ofi-nccl \ | ||
| && ./autogen.sh \ | ||
| && ./configure \ | ||
| --with-cuda=${CUDA_PATH} \ | ||
| --with-cuda=${CUDA_HOME} \ | ||
| --with-libfabric=${LIBFABRIC_PATH} \ | ||
| --with-mpi=${OPEN_MPI_PATH} \ | ||
| --with-cuda=${CUDA_PATH} \ | ||
| --with-nccl=${PREFIX} \ | ||
| --with-cuda=${CUDA_HOME} \ | ||
| --with-nccl=${NCCL_HOME} \ | ||
| --disable-tests \ | ||
| --prefix=${PREFIX} \ | ||
| && make -j$(numproc) \ | ||
| && make install | ||
| --prefix=${NCCL_HOME} \ | ||
| && make -j$(nproc) \ | ||
| && make install \ | ||
| && rm -rf /tmp/aws-ofi-nccl /var/lib/apt/lists/* | ||
|
|
||
| # NCCL Tests | ||
|
|
||
| RUN cd $HOME \ | ||
| RUN cd /opt \ | ||
| && git clone https://github.com/NVIDIA/nccl-tests \ | ||
| && cd nccl-tests \ | ||
| && make -j$(numproc) \ | ||
| && make -j$(nproc) \ | ||
| MPI=1 \ | ||
| MPI_HOME=${OPEN_MPI_PATH} \ | ||
| CUDA_HOME=${CUDA_PATH} \ | ||
| NCCL_HOME=${PREFIX} | ||
|
|
||
| ARG BUILD_DATE | ||
| ARG IMAGE_NAME | ||
| ARG DSTACK_REVISION | ||
|
|
||
| LABEL org.opencontainers.image.title="${IMAGE_NAME}" | ||
| LABEL org.opencontainers.image.version="${EFA_VERSION}-${DSTACK_REVISION}" | ||
| LABEL org.opencontainers.image.created="${BUILD_DATE}" | ||
| CUDA_HOME=${CUDA_HOME} \ | ||
| NCCL_HOME=${NCCL_HOME} | ||
Uh oh!
There was an error while loading. Please reload this page.