Skip to content

Commit

Permalink
Merge pull request #28647 from billysuh7/topic/bsuh/libnvshmem
Browse files Browse the repository at this point in the history
nvshmem 3.1.7
  • Loading branch information
carterbox authored Feb 13, 2025
2 parents 90a4e75 + 392aeda commit 6ae3404
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 0 deletions.
15 changes: 15 additions & 0 deletions recipes/libnvshmem/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -e

check-glibc bin/* lib/* bin/examples/* bin/perftest/device/coll/* bin/perftest/device/pt-to-pt/* bin/perftest/host/coll/* bin/perftest/host/init/* bin/perftest/host/pt-to-pt/*

mkdir -p $PREFIX/lib/

cp -rv bin $PREFIX/
cp -rv include $PREFIX/
cp -rv lib/cmake $PREFIX/lib/
cp -rv lib/*nvshmem*.so* $PREFIX/lib
cp -rv lib/*nvshmem*.a $PREFIX/lib
cp -rv share/ $PREFIX/

44 changes: 44 additions & 0 deletions recipes/libnvshmem/compile_perf_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

#!/bin/bash

set -ex

#GPU Arch - anything recent should do but change accordingly if build breaks
SM=89

[[ ${target_platform} == "linux-64" ]] && targetsDir="targets/x86_64-linux"
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html?highlight=tegra#cross-compilation
[[ ${target_platform} == "linux-aarch64" && ${arm_variant_type} == "sbsa" ]] && targetsDir="targets/sbsa-linux"
[[ ${target_platform} == "linux-aarch64" && ${arm_variant_type} == "tegra" ]] && targetsDir="targets/aarch64-linux"

if [ -z "${targetsDir+x}" ]; then
echo "target_platform: ${target_platform} is unknown! targetsDir must be defined!" >&2
exit 1
fi

# E.g. $CONDA_PREFIX/libexec/gcc/x86_64-conda-linux-gnu/13.3.0/cc1plus
find $CONDA_PREFIX -name cc1plus

GCC_DIR=$(dirname $(find $CONDA_PREFIX -name cc1plus))

export PATH=${GCC_DIR}:$PATH
export LD_LIBRARY_PATH=${GCC_DIR}:$LD_LIBRARY_PATH

# No need for use-linker-plugin optimization, causes compile failure, don't use it for the test
export CXXFLAGS="${CXXFLAGS} -fno-use-linker-plugin"

echo CC = $CC
echo CXX = $CXX

cmake -S $PREFIX/share/src/perftest \
-DCMAKE_LIBRARY_PATH=${GCC_DIR} \
-DCMAKE_C_COMPILER=$CC \
-DCMAKE_CUDA_COMPILER=$PREFIX/bin/nvcc \
-DCMAKE_CXX_COMPILER=$CXX \
-DCUDAToolkit_INCLUDE_DIRECTORIES="$PREFIX/include;$PREFIX/${targetsDir}/include" \
-DNVSHMEM_MPI_SUPPORT=0 \
-DNVSHMEM_PREFIX=$PREFIX \
-DCUDA_HOME=$PREFIX \
-DCMAKE_CUDA_ARCHITECTURES="${SM}"

cmake --build .
2 changes: 2 additions & 0 deletions recipes/libnvshmem/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
c_stdlib_version: # [linux]
- 2.28 # [linux]
223 changes: 223 additions & 0 deletions recipes/libnvshmem/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
{% set version = "3.1.7" %}
{% set platform = "linux-x86_64" %} # [linux64]
{% set platform = "linux-sbsa" %} # [aarch64]
{% set extension = "tar.xz" %}

{% set soname = version.split(".")[0] %}
{% set cuda_major = environ.get("cuda_compiler_version", "12.6").split(".")[0] %}

{% set sha256 = "4e03e40d16770a5bdbcefeb7f18579bdfc07aaa7b09a4825a7ab9b5122f69567" %} # [linux64 and (cuda_compiler_version or "").startswith("11")]
{% set sha256 = "128c62dd19926f58d1ff69290917dd50f3fb1690567f52d3965cd8fbfb9b2412" %} # [linux64 and (cuda_compiler_version or "").startswith("12")]
{% set sha256 = "03e4404e888609bc98f496453896acc34c9ca7bbc39cc32b41d677bcaab71c49" %} # [aarch64 and (cuda_compiler_version or "").startswith("12")]

package:
name: libnvshmem-split
version: {{ version }}

source:
url: https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/{{ platform }}/libnvshmem-{{ platform }}-{{ version }}_cuda{{ cuda_major }}-archive.{{ extension }}
sha256: {{ sha256 }}

build:
number: 0
skip: true # [not (linux64 or aarch64)]
skip: true # [cuda_compiler_version in (None, "None") or (aarch64 and (cuda_compiler_version or "").startswith("11"))]

requirements:
build:
- cf-nvidia-tools 1 # [linux]

outputs:
- name: libnvshmem{{ soname }}
build:
ignore_run_exports_from: # [(cuda_compiler_version or "").startswith("12")]
- {{ compiler('cuda') }} # [(cuda_compiler_version or "").startswith("12")]
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")]
missing_dso_whitelist:
- "*libcuda.so*" # driver
- "*openmpi*" # omitted as dependency - customer HPC systems have own versions of MPI installed already
- "*libfabric*" # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin
- "*libfabric1*" # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin
- "*libmlx5*" # omitted as dependency - needed for ibgda and ibdevx transport, installed as part of the Mellanox OFED
- "*libmpi*" # omitted as dependency - part of openmpi
- "*libpmix*" # omitted as dependency - part of openmpi
- "*liboshmem*" # omitted as dependency - part of openmpi
- "*rdma-core*" # omitted as dependency - MOFED replaces rdma-core
files:
- lib/*nvshmem*.so.*
requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }}
- {{ stdlib("c") }}
- arm-variant * {{ arm_variant_type }} # [aarch64]
host:
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"]
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")]
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")]
- ucx
run:
- {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
- cuda-cudart # [(cuda_compiler_version or "").startswith("12")]
- libpciaccess
- libpmix
- nccl
- pmix-bin
- ucx
run_constrained:
- arm-variant * {{ arm_variant_type }} # [aarch64]
test:
commands:
- test -L $PREFIX/lib/libnvshmem_host.so.{{ soname }}
- test -f $PREFIX/lib/libnvshmem_host.so.{{ version }}
- test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}.0.0
- test -L $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}
- test -f $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}.0.0

- name: libnvshmem-dev
build:
ignore_run_exports_from: # [(cuda_compiler_version or "").startswith("12")]
- {{ compiler('cuda') }} # [(cuda_compiler_version or "").startswith("12")]
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")]
missing_dso_whitelist:
- "*libcuda.so*" # driver
- "*libnvshmem_host.so.{{ soname }}" # avoids: ERROR (libnvshmem-dev,bin/perftest/device/pt-to-pt/shmem_put_bw): lib/libnvshmem_host.so.3 not found in any packages. We DO test for its existence though.
- "*libmpi*"
- "*libpmix*"
- "*liboshmem*"
run_exports:
- {{ pin_subpackage("libnvshmem" ~ soname, max_pin=None) }}
files:
- bin/
- include/nvshmem*.h
- include/bootstrap_device_host
- include/device
- include/device_host
- include/device_host_transport
- include/host
- include/non_abi
- lib/cmake
- lib/*nvshmem*.so
- share/src
requirements:
build:
- {{ compiler("c") }}
- {{ compiler('cuda') }}
- {{ compiler("cxx") }}
- {{ stdlib("c") }}
- arm-variant * {{ arm_variant_type }} # [aarch64]
host:
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"]
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")]
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")]
- libpciaccess
run:
- {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
- {{ pin_subpackage("libnvshmem" ~ soname, exact=True) }}
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")]
- cuda-cudart # [(cuda_compiler_version or "").startswith("12")]
run_constrained:
- arm-variant * {{ arm_variant_type }} # [aarch64]
test:
commands:
- test -f $PREFIX/bin/hydra_pmi_proxy
- test -f $PREFIX/bin/nvshmem-info
- test -f $PREFIX/bin/nvshmrun.hydra
- test -f $PREFIX/bin/hydra_nameserver
- test -f $PREFIX/bin/nvshmrun
- test -f $PREFIX/bin/hydra_persist
- test -f $PREFIX/bin/examples/collective-launch
- test -f $PREFIX/bin/examples/on-stream
- test -f $PREFIX/bin/perftest/device/pt-to-pt/shmem_p_latency
- test -f $PREFIX/bin/perftest/device/coll/reduction_latency
- test -f $PREFIX/bin/perftest/host/init/malloc
- test -f $PREFIX/bin/perftest/host/pt-to-pt/bw
- test -f $PREFIX/bin/perftest/host/coll/broadcast_on_stream
- test -f $PREFIX/include/bootstrap_device_host/nvshmem_uniqueid.h
- test -f $PREFIX/include/device/nvshmem_defines.h
- test -f $PREFIX/include/device_host/nvshmem_types.h
- test -f $PREFIX/include/host/nvshmem_api.h
- test -f $PREFIX/include/non_abi/device/coll/defines.cuh
- test -f $PREFIX/include/device_host_transport/nvshmem_constants.h
- test -f $PREFIX/include/nvshmem.h
- test -f $PREFIX/include/nvshmemx.h
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMConfig.cmake
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets-release.cmake
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets.cmake
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets-release.cmake
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets.cmake
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMVersion.cmake
- test -L $PREFIX/lib/libnvshmem_host.so
- test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so
- test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so
- test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so
- test -L $PREFIX/lib/nvshmem_bootstrap_uid.so
- test -L $PREFIX/lib/nvshmem_transport_ibdevx.so
- test -L $PREFIX/lib/nvshmem_transport_ibgda.so
- test -L $PREFIX/lib/nvshmem_transport_ibrc.so
- test -L $PREFIX/lib/nvshmem_transport_libfabric.so
- test -L $PREFIX/lib/nvshmem_transport_ucx.so

- name: libnvshmem-static
files:
- lib/libnvshmem*.a
requirements:
run:
- {{ pin_subpackage("libnvshmem-dev", exact=True) }}
run_constrained:
- arm-variant * {{ arm_variant_type }} # [aarch64]
test:
skip: true # [not linux64]
requires:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ stdlib("c") }}
- cmake
- make
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"]
- {{ compiler('cuda') }}
- openmpi
files:
- compile_perf_test.sh
commands:
- test -f $PREFIX/lib/libnvshmem_device.a
- test -f $PREFIX/lib/libnvshmem.a
- bash compile_perf_test.sh

about:
home: https://docs.nvidia.com/nvshmem/index.html
license: LicenseRef-NVIDIA-End-User-License-Agreement
license_file: LICENSE
license_url: https://docs.nvidia.com/nvshmem/api/sla.html
summary: NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs.
description: |
NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs.
NVSHMEM can significantly reduce communication and coordination overheads by allowing programmers to perform these operations from within CUDA kernels and on CUDA streams.
doc_url: https://docs.nvidia.com/nvshmem/api/index.html

extra:
feedstock-name: libnvshmem
recipe-maintainers:
- conda-forge/cuda

0 comments on commit 6ae3404

Please sign in to comment.