Skip to content

Commit bcfc856

Browse files
[UX] Pre-build a EFA version of the default Docker image #2793
1 parent dbb3545 commit bcfc856

File tree

1 file changed

+18
-34
lines changed

1 file changed

+18
-34
lines changed

docker/base/base-efa.Dockerfile

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
# Build stage
44
FROM nvidia/cuda:12.1.1-base-ubuntu20.04 AS builder
55

6+
ARG NCCL_VERSION=2.26.2-1
7+
ARG EFA_VERSION=1.38.1
8+
ARG OFI_VERSION=1.14.0
9+
610
ENV NCCL_HOME=/opt/nccl
7-
ENV CUDA_PATH=/usr/local/cuda
11+
ENV CUDA_HOME=/usr/local/cuda
812
ENV LIBFABRIC_PATH=/opt/amazon/efa
913
ENV OPEN_MPI_PATH=/opt/amazon/openmpi
1014
ENV NCCL_TESTS_HOME=/opt/nccl-tests
11-
ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
12-
ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1315

1416
# Install build dependencies
1517
RUN export DEBIAN_FRONTEND=noninteractive \
@@ -33,40 +35,18 @@ RUN export DEBIAN_FRONTEND=noninteractive \
3335
python3 \
3436
build-essential
3537

36-
# EFA
37-
38-
ARG EFA_VERSION=1.38.1
39-
40-
RUN cd $HOME \
38+
RUN cd /tmp \
4139
&& curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
4240
&& tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
4341
&& cd aws-efa-installer \
4442
&& ./efa_installer.sh -y --skip-kmod -g
4543

46-
# NCCL
47-
48-
ARG NCCL_VERSION=2.26.2-1
49-
50-
# Build NCCL tests
51-
RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
52-
&& cd ${NCCL_TESTS_HOME} \
53-
&& make -j$(nproc) \
54-
MPI=1 \
55-
MPI_HOME=${OPEN_MPI_PATH} \
56-
CUDA_HOME=${CUDA_HOME} \
57-
NCCL_HOME=${NCCL_HOME}
58-
59-
60-
# AWS OFI NCCL
61-
62-
ARG OFI_VERSION=1.14.0
63-
64-
RUN cd $HOME \
44+
RUN cd /tmp \
6545
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
6646
&& cd aws-ofi-nccl \
6747
&& ./autogen.sh \
6848
&& ./configure \
69-
--with-cuda=${CUDA_PATH} \
49+
--with-cuda=${CUDA_HOME} \
7050
--with-libfabric=${LIBFABRIC_PATH} \
7151
--with-mpi=${OPEN_MPI_PATH} \
7252
--with-nccl=${NCCL_HOME} \
@@ -75,15 +55,19 @@ RUN cd $HOME \
7555
&& make -j$(numproc) \
7656
&& make install
7757

78-
# NCCL Tests
58+
# Build NCCL
59+
RUN cd /tmp \
60+
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
61+
&& cd nccl \
62+
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME}
7963

80-
RUN cd $HOME \
81-
&& git clone https://github.com/NVIDIA/nccl-tests \
82-
&& cd nccl-tests \
83-
&& make -j$(numproc) \
64+
# Build NCCL tests
65+
RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
66+
&& cd ${NCCL_TESTS_HOME} \
67+
&& make -j$(nproc) \
8468
MPI=1 \
8569
MPI_HOME=${OPEN_MPI_PATH} \
86-
CUDA_HOME=${CUDA_PATH} \
70+
CUDA_HOME=${CUDA_HOME} \
8771
NCCL_HOME=${NCCL_HOME}
8872

8973
# Final stage

0 commit comments

Comments
 (0)