-
-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28647 from billysuh7/topic/bsuh/libnvshmem
nvshmem 3.1.7
- Loading branch information
Showing
4 changed files
with
284 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
check-glibc bin/* lib/* bin/examples/* bin/perftest/device/coll/* bin/perftest/device/pt-to-pt/* bin/perftest/host/coll/* bin/perftest/host/init/* bin/perftest/host/pt-to-pt/* | ||
|
||
mkdir -p $PREFIX/lib/ | ||
|
||
cp -rv bin $PREFIX/ | ||
cp -rv include $PREFIX/ | ||
cp -rv lib/cmake $PREFIX/lib/ | ||
cp -rv lib/*nvshmem*.so* $PREFIX/lib | ||
cp -rv lib/*nvshmem*.a $PREFIX/lib | ||
cp -rv share/ $PREFIX/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
#!/bin/bash | ||
|
||
set -ex | ||
|
||
#GPU Arch - anything recent should do but change accordingly if build breaks | ||
SM=89 | ||
|
||
[[ ${target_platform} == "linux-64" ]] && targetsDir="targets/x86_64-linux" | ||
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html?highlight=tegra#cross-compilation | ||
[[ ${target_platform} == "linux-aarch64" && ${arm_variant_type} == "sbsa" ]] && targetsDir="targets/sbsa-linux" | ||
[[ ${target_platform} == "linux-aarch64" && ${arm_variant_type} == "tegra" ]] && targetsDir="targets/aarch64-linux" | ||
|
||
if [ -z "${targetsDir+x}" ]; then | ||
echo "target_platform: ${target_platform} is unknown! targetsDir must be defined!" >&2 | ||
exit 1 | ||
fi | ||
|
||
# E.g. $CONDA_PREFIX/libexec/gcc/x86_64-conda-linux-gnu/13.3.0/cc1plus | ||
find $CONDA_PREFIX -name cc1plus | ||
|
||
GCC_DIR=$(dirname $(find $CONDA_PREFIX -name cc1plus)) | ||
|
||
export PATH=${GCC_DIR}:$PATH | ||
export LD_LIBRARY_PATH=${GCC_DIR}:$LD_LIBRARY_PATH | ||
|
||
# No need for use-linker-plugin optimization, causes compile failure, don't use it for the test | ||
export CXXFLAGS="${CXXFLAGS} -fno-use-linker-plugin" | ||
|
||
echo CC = $CC | ||
echo CXX = $CXX | ||
|
||
cmake -S $PREFIX/share/src/perftest \ | ||
-DCMAKE_LIBRARY_PATH=${GCC_DIR} \ | ||
-DCMAKE_C_COMPILER=$CC \ | ||
-DCMAKE_CUDA_COMPILER=$PREFIX/bin/nvcc \ | ||
-DCMAKE_CXX_COMPILER=$CXX \ | ||
-DCUDAToolkit_INCLUDE_DIRECTORIES="$PREFIX/include;$PREFIX/${targetsDir}/include" \ | ||
-DNVSHMEM_MPI_SUPPORT=0 \ | ||
-DNVSHMEM_PREFIX=$PREFIX \ | ||
-DCUDA_HOME=$PREFIX \ | ||
-DCMAKE_CUDA_ARCHITECTURES="${SM}" | ||
|
||
cmake --build . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
c_stdlib_version: # [linux] | ||
- 2.28 # [linux] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
{% set version = "3.1.7" %} | ||
{% set platform = "linux-x86_64" %} # [linux64] | ||
{% set platform = "linux-sbsa" %} # [aarch64] | ||
{% set extension = "tar.xz" %} | ||
|
||
{% set soname = version.split(".")[0] %} | ||
{% set cuda_major = environ.get("cuda_compiler_version", "12.6").split(".")[0] %} | ||
|
||
{% set sha256 = "4e03e40d16770a5bdbcefeb7f18579bdfc07aaa7b09a4825a7ab9b5122f69567" %} # [linux64 and (cuda_compiler_version or "").startswith("11")] | ||
{% set sha256 = "128c62dd19926f58d1ff69290917dd50f3fb1690567f52d3965cd8fbfb9b2412" %} # [linux64 and (cuda_compiler_version or "").startswith("12")] | ||
{% set sha256 = "03e4404e888609bc98f496453896acc34c9ca7bbc39cc32b41d677bcaab71c49" %} # [aarch64 and (cuda_compiler_version or "").startswith("12")] | ||
|
||
package: | ||
name: libnvshmem-split | ||
version: {{ version }} | ||
|
||
source: | ||
url: https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/{{ platform }}/libnvshmem-{{ platform }}-{{ version }}_cuda{{ cuda_major }}-archive.{{ extension }} | ||
sha256: {{ sha256 }} | ||
|
||
build: | ||
number: 0 | ||
skip: true # [not (linux64 or aarch64)] | ||
skip: true # [cuda_compiler_version in (None, "None") or (aarch64 and (cuda_compiler_version or "").startswith("11"))] | ||
|
||
requirements: | ||
build: | ||
- cf-nvidia-tools 1 # [linux] | ||
|
||
outputs: | ||
- name: libnvshmem{{ soname }} | ||
build: | ||
ignore_run_exports_from: # [(cuda_compiler_version or "").startswith("12")] | ||
- {{ compiler('cuda') }} # [(cuda_compiler_version or "").startswith("12")] | ||
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")] | ||
missing_dso_whitelist: | ||
- "*libcuda.so*" # driver | ||
- "*openmpi*" # omitted as dependency - customer HPC systems have own versions of MPI installed already | ||
- "*libfabric*" # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin | ||
- "*libfabric1*" # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin | ||
- "*libmlx5*" # omitted as dependency - needed for ibgda and ibdevx transport, installed as part of the Mellanox OFED | ||
- "*libmpi*" # omitted as dependency - part of openmpi | ||
- "*libpmix*" # omitted as dependency - part of openmpi | ||
- "*liboshmem*" # omitted as dependency - part of openmpi | ||
- "*rdma-core*" # omitted as dependency - MOFED replaces rdma-core | ||
files: | ||
- lib/*nvshmem*.so.* | ||
requirements: | ||
build: | ||
- {{ compiler('c') }} | ||
- {{ compiler('cxx') }} | ||
- {{ compiler('cuda') }} | ||
- {{ stdlib("c") }} | ||
- arm-variant * {{ arm_variant_type }} # [aarch64] | ||
host: | ||
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] | ||
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")] | ||
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")] | ||
- ucx | ||
run: | ||
- {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }} | ||
- cuda-cudart # [(cuda_compiler_version or "").startswith("12")] | ||
- libpciaccess | ||
- libpmix | ||
- nccl | ||
- pmix-bin | ||
- ucx | ||
run_constrained: | ||
- arm-variant * {{ arm_variant_type }} # [aarch64] | ||
test: | ||
commands: | ||
- test -L $PREFIX/lib/libnvshmem_host.so.{{ soname }} | ||
- test -f $PREFIX/lib/libnvshmem_host.so.{{ version }} | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}.0.0 | ||
- test -L $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }} | ||
- test -f $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}.0.0 | ||
|
||
- name: libnvshmem-dev | ||
build: | ||
ignore_run_exports_from: # [(cuda_compiler_version or "").startswith("12")] | ||
- {{ compiler('cuda') }} # [(cuda_compiler_version or "").startswith("12")] | ||
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")] | ||
missing_dso_whitelist: | ||
- "*libcuda.so*" # driver | ||
- "*libnvshmem_host.so.{{ soname }}" # avoids: ERROR (libnvshmem-dev,bin/perftest/device/pt-to-pt/shmem_put_bw): lib/libnvshmem_host.so.3 not found in any packages. We DO test for its existence though. | ||
- "*libmpi*" | ||
- "*libpmix*" | ||
- "*liboshmem*" | ||
run_exports: | ||
- {{ pin_subpackage("libnvshmem" ~ soname, max_pin=None) }} | ||
files: | ||
- bin/ | ||
- include/nvshmem*.h | ||
- include/bootstrap_device_host | ||
- include/device | ||
- include/device_host | ||
- include/device_host_transport | ||
- include/host | ||
- include/non_abi | ||
- lib/cmake | ||
- lib/*nvshmem*.so | ||
- share/src | ||
requirements: | ||
build: | ||
- {{ compiler("c") }} | ||
- {{ compiler('cuda') }} | ||
- {{ compiler("cxx") }} | ||
- {{ stdlib("c") }} | ||
- arm-variant * {{ arm_variant_type }} # [aarch64] | ||
host: | ||
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] | ||
- cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")] | ||
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")] | ||
- libpciaccess | ||
run: | ||
- {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }} | ||
- {{ pin_subpackage("libnvshmem" ~ soname, exact=True) }} | ||
- cudatoolkit # [(cuda_compiler_version or "").startswith("11")] | ||
- cuda-cudart # [(cuda_compiler_version or "").startswith("12")] | ||
run_constrained: | ||
- arm-variant * {{ arm_variant_type }} # [aarch64] | ||
test: | ||
commands: | ||
- test -f $PREFIX/bin/hydra_pmi_proxy | ||
- test -f $PREFIX/bin/nvshmem-info | ||
- test -f $PREFIX/bin/nvshmrun.hydra | ||
- test -f $PREFIX/bin/hydra_nameserver | ||
- test -f $PREFIX/bin/nvshmrun | ||
- test -f $PREFIX/bin/hydra_persist | ||
- test -f $PREFIX/bin/examples/collective-launch | ||
- test -f $PREFIX/bin/examples/on-stream | ||
- test -f $PREFIX/bin/perftest/device/pt-to-pt/shmem_p_latency | ||
- test -f $PREFIX/bin/perftest/device/coll/reduction_latency | ||
- test -f $PREFIX/bin/perftest/host/init/malloc | ||
- test -f $PREFIX/bin/perftest/host/pt-to-pt/bw | ||
- test -f $PREFIX/bin/perftest/host/coll/broadcast_on_stream | ||
- test -f $PREFIX/include/bootstrap_device_host/nvshmem_uniqueid.h | ||
- test -f $PREFIX/include/device/nvshmem_defines.h | ||
- test -f $PREFIX/include/device_host/nvshmem_types.h | ||
- test -f $PREFIX/include/host/nvshmem_api.h | ||
- test -f $PREFIX/include/non_abi/device/coll/defines.cuh | ||
- test -f $PREFIX/include/device_host_transport/nvshmem_constants.h | ||
- test -f $PREFIX/include/nvshmem.h | ||
- test -f $PREFIX/include/nvshmemx.h | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMConfig.cmake | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets-release.cmake | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets.cmake | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets-release.cmake | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets.cmake | ||
- test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMVersion.cmake | ||
- test -L $PREFIX/lib/libnvshmem_host.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so | ||
- test -L $PREFIX/lib/nvshmem_bootstrap_uid.so | ||
- test -L $PREFIX/lib/nvshmem_transport_ibdevx.so | ||
- test -L $PREFIX/lib/nvshmem_transport_ibgda.so | ||
- test -L $PREFIX/lib/nvshmem_transport_ibrc.so | ||
- test -L $PREFIX/lib/nvshmem_transport_libfabric.so | ||
- test -L $PREFIX/lib/nvshmem_transport_ucx.so | ||
|
||
- name: libnvshmem-static | ||
files: | ||
- lib/libnvshmem*.a | ||
requirements: | ||
run: | ||
- {{ pin_subpackage("libnvshmem-dev", exact=True) }} | ||
run_constrained: | ||
- arm-variant * {{ arm_variant_type }} # [aarch64] | ||
test: | ||
skip: true # [not linux64] | ||
requires: | ||
- {{ compiler('c') }} | ||
- {{ compiler('cxx') }} | ||
- {{ stdlib("c") }} | ||
- cmake | ||
- make | ||
- cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] | ||
- {{ compiler('cuda') }} | ||
- openmpi | ||
files: | ||
- compile_perf_test.sh | ||
commands: | ||
- test -f $PREFIX/lib/libnvshmem_device.a | ||
- test -f $PREFIX/lib/libnvshmem.a | ||
- bash compile_perf_test.sh | ||
|
||
about: | ||
home: https://docs.nvidia.com/nvshmem/index.html | ||
license: LicenseRef-NVIDIA-End-User-License-Agreement | ||
license_file: LICENSE | ||
license_url: https://docs.nvidia.com/nvshmem/api/sla.html | ||
summary: NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs. | ||
description: | | ||
NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs. | ||
NVSHMEM can significantly reduce communication and coordination overheads by allowing programmers to perform these operations from within CUDA kernels and on CUDA streams. | ||
doc_url: https://docs.nvidia.com/nvshmem/api/index.html | ||
|
||
extra: | ||
feedstock-name: libnvshmem | ||
recipe-maintainers: | ||
- conda-forge/cuda |