Skip to content

Commit 6225135

Browse files
[UX] Pre-build a EFA version of the default Docker image #2793
1 parent 0e36c85 commit 6225135

File tree

1 file changed

+40
-27
lines changed

1 file changed

+40
-27
lines changed

docker/base/efa.Dockerfile

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,57 +2,70 @@
22

33
INCLUDE+ base/Dockerfile.common
44

5-
ENV NCCL_HOME=/usr/local
6-
ENV CUDA_HOME=/usr/local/cuda
5+
ENV PREFIX=/usr/local
6+
ENV CUDA_PATH=/usr/local/cuda
77
ENV LIBFABRIC_PATH=/opt/amazon/efa
88
ENV OPEN_MPI_PATH=/opt/amazon/openmpi
9-
ENV NCCL_TESTS_HOME=/opt/nccl-tests
109
ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
10+
ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1111

12-
ARG EFA_VERSION=1.38.1
13-
ARG NCCL_VERSION=2.26.2-1
14-
ARG OFI_VERSION=1.14.0
15-
ARG FLAVOR
12+
# prerequisites
1613

17-
RUN apt-get update \
18-
&& cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
14+
RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
15+
&& apt-get update \
1916
&& apt-get install -y --no-install-recommends \
2017
cuda-libraries-dev-${cuda_version} \
2118
cuda-nvcc-${cuda_version} \
2219
libhwloc-dev \
2320
autoconf \
2421
automake \
25-
libtool \
26-
&& cd $HOME \
22+
libtool
23+
24+
# EFA
25+
26+
ARG EFA_VERSION=1.38.1
27+
28+
RUN cd $HOME \
2729
&& curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
2830
&& tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
2931
&& cd aws-efa-installer \
30-
&& ./efa_installer.sh -y --skip-kmod -g \
31-
&& cd $HOME \
32+
&& ./efa_installer.sh -y --skip-kmod -g
33+
34+
# NCCL
35+
36+
ARG NCCL_VERSION=2.26.2-1
37+
38+
RUN cd $HOME \
3239
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
3340
&& cd nccl \
34-
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
35-
&& cd $HOME \
41+
&& make -j$(nproc) src.build BUILDDIR=${PREFIX}
42+
43+
# AWS OFI NCCL
44+
45+
ARG OFI_VERSION=1.14.0
46+
47+
RUN cd $HOME \
3648
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
3749
&& cd aws-ofi-nccl \
3850
&& ./autogen.sh \
3951
&& ./configure \
40-
--with-cuda=${CUDA_HOME} \
52+
--with-cuda=${CUDA_PATH} \
4153
--with-libfabric=${LIBFABRIC_PATH} \
4254
--with-mpi=${OPEN_MPI_PATH} \
43-
--with-nccl=${NCCL_HOME} \
55+
--with-cuda=${CUDA_PATH} \
56+
--with-nccl=${PREFIX} \
4457
--disable-tests \
45-
--prefix=${NCCL_HOME} \
58+
--prefix=${PREFIX} \
4659
&& make -j$(numproc) \
47-
&& make install \
48-
&& git clone https://github.com/NVIDIA/nccl-tests ${HOME}/nccl-tests \
49-
&& cd ${HOME}/nccl-tests \
60+
&& make install
61+
62+
# NCCL Tests
63+
64+
RUN cd $HOME \
65+
&& git clone https://github.com/NVIDIA/nccl-tests \
66+
&& cd nccl-tests \
5067
&& make -j$(numproc) \
5168
MPI=1 \
5269
MPI_HOME=${OPEN_MPI_PATH} \
53-
CUDA_HOME=${CUDA_HOME} \
54-
NCCL_HOME=${NCCL_HOME} \
55-
&& ln -s ${HOME}/nccl-tests/build ${NCCL_TESTS_HOME} \
56-
&& echo "${OPEN_MPI_PATH}/lib" >> /etc/ld.so.conf.d/openmpi.conf \
57-
&& echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
58-
&& ldconfig
70+
CUDA_HOME=${CUDA_PATH} \
71+
NCCL_HOME=${PREFIX}

0 commit comments

Comments
 (0)