33# Build stage
44FROM nvidia/cuda:12.1.1-base-ubuntu20.04 AS builder
55
6+ ARG NCCL_VERSION=2.26.2-1
7+ ARG EFA_VERSION=1.38.1
8+ ARG OFI_VERSION=1.14.0
9+
610ENV NCCL_HOME=/opt/nccl
7- ENV CUDA_PATH =/usr/local/cuda
11+ ENV CUDA_HOME =/usr/local/cuda
812ENV LIBFABRIC_PATH=/opt/amazon/efa
913ENV OPEN_MPI_PATH=/opt/amazon/openmpi
1014ENV NCCL_TESTS_HOME=/opt/nccl-tests
11- ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
12- ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1315
1416# Install build dependencies
1517RUN export DEBIAN_FRONTEND=noninteractive \
@@ -33,40 +35,18 @@ RUN export DEBIAN_FRONTEND=noninteractive \
3335 python3 \
3436 build-essential
3537
36- # EFA
37-
38- ARG EFA_VERSION=1.38.1
39-
40- RUN cd $HOME \
38+ RUN cd /tmp \
4139 && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
4240 && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
4341 && cd aws-efa-installer \
4442 && ./efa_installer.sh -y --skip-kmod -g
4543
46- # NCCL
47-
48- ARG NCCL_VERSION=2.26.2-1
49-
50- # Build NCCL tests
51- RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
52- && cd ${NCCL_TESTS_HOME} \
53- && make -j$(nproc) \
54- MPI=1 \
55- MPI_HOME=${OPEN_MPI_PATH} \
56- CUDA_HOME=${CUDA_HOME} \
57- NCCL_HOME=${NCCL_HOME}
58-
59-
60- # AWS OFI NCCL
61-
62- ARG OFI_VERSION=1.14.0
63-
64- RUN cd $HOME \
44+ RUN cd /tmp \
6545 && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
6646 && cd aws-ofi-nccl \
6747 && ./autogen.sh \
6848 && ./configure \
69- --with-cuda=${CUDA_PATH } \
49+ --with-cuda=${CUDA_HOME } \
7050 --with-libfabric=${LIBFABRIC_PATH} \
7151 --with-mpi=${OPEN_MPI_PATH} \
7252 --with-nccl=${NCCL_HOME} \
@@ -75,15 +55,19 @@ RUN cd $HOME \
7555 && make -j$(numproc) \
7656 && make install
7757
78- # NCCL Tests
58+ # Build NCCL
59+ RUN cd /tmp \
60+ && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
61+ && cd nccl \
62+ && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME}
7963
80- RUN cd $HOME \
81- && git clone https://github.com/NVIDIA/nccl-tests \
82- && cd nccl-tests \
83- && make -j$(numproc ) \
64+ # Build NCCL tests
65+ RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
66+ && cd ${NCCL_TESTS_HOME} \
67+ && make -j$(nproc ) \
8468 MPI=1 \
8569 MPI_HOME=${OPEN_MPI_PATH} \
86- CUDA_HOME=${CUDA_PATH } \
70+ CUDA_HOME=${CUDA_HOME } \
8771 NCCL_HOME=${NCCL_HOME}
8872
8973# Final stage
0 commit comments