22# SPDX-License-Identifier: MIT-0
33FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
44
5- ARG GDRCOPY_VERSION=2 .4.1
5+ ARG GDRCOPY_VERSION=v2 .4.1
66ARG EFA_INSTALLER_VERSION=1.31.0
7- ARG AWS_OFI_NCCL_VERSION=1 .8.1
8- ARG NCCL_VERSION=2 .20.3
9- ARG NCCL_TESTS_VERSION=2 .13.9
7+ ARG AWS_OFI_NCCL_VERSION=v1 .8.1-aws
8+ ARG NCCL_VERSION=v2 .20.3-1
9+ ARG NCCL_TESTS_VERSION=v2 .13.9
1010
1111RUN apt-get update -y
1212RUN apt-get remove -y --allow-change-held-packages \
@@ -60,7 +60,7 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py \
6060
6161# ################################################
6262# # Install NVIDIA GDRCopy
63- RUN git clone -b v ${GDRCOPY_VERSION} https://github.com/NVIDIA/gdrcopy.git /tmp/gdrcopy \
63+ RUN git clone -b ${GDRCOPY_VERSION} https://github.com/NVIDIA/gdrcopy.git /tmp/gdrcopy \
6464 && cd /tmp/gdrcopy \
6565 && make prefix=/opt/gdrcopy install
6666
@@ -80,17 +80,17 @@ RUN cd $HOME \
8080
8181# ##################################################
8282# # Install NCCL
83- RUN git clone -b v ${NCCL_VERSION}-1 https://github.com/NVIDIA/nccl.git /opt/nccl \
83+ RUN git clone -b ${NCCL_VERSION} https://github.com/NVIDIA/nccl.git /opt/nccl \
8484 && cd /opt/nccl \
8585 && make -j $(nproc) src.build CUDA_HOME=/usr/local/cuda \
8686 NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_90,code=sm_90"
8787
8888# ##################################################
8989# # Install AWS-OFI-NCCL plugin
9090RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libhwloc-dev
91- RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/v ${AWS_OFI_NCCL_VERSION}-aws /aws-ofi-nccl-${AWS_OFI_NCCL_VERSION}-aws .tar.gz \
92- && tar -xf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION}-aws .tar.gz \
93- && cd aws-ofi-nccl-${AWS_OFI_NCCL_VERSION}-aws \
91+ RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/${AWS_OFI_NCCL_VERSION}/aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} .tar.gz \
92+ && tar -xf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} .tar.gz \
93+ && cd aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
9494 && ./configure --prefix=/opt/aws-ofi-nccl/install \
9595 --with-mpi=/opt/amazon/openmpi \
9696 --with-libfabric=/opt/amazon/efa \
@@ -99,12 +99,12 @@ RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/v${AWS_OFI_NC
9999 && make -j $(nproc) \
100100 && make install \
101101 && cd .. \
102- && rm -rf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION}-aws \
103- && rm aws-ofi-nccl-${AWS_OFI_NCCL_VERSION}-aws .tar.gz
102+ && rm -rf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
103+ && rm aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} .tar.gz
104104
105105# ##################################################
106106# # Install NCCL-tests
107- RUN git clone -b v ${NCCL_TESTS_VERSION} https://github.com/NVIDIA/nccl-tests.git /opt/nccl-tests \
107+ RUN git clone -b ${NCCL_TESTS_VERSION} https://github.com/NVIDIA/nccl-tests.git /opt/nccl-tests \
108108 && cd /opt/nccl-tests \
109109 && make -j $(nproc) \
110110 MPI=1 \
0 commit comments