conda-forge · carterbox · Feb 13, 2025 · Dec 14, 2024 · Jan 14, 2025 · Jan 15, 2025
diff --git a/recipes/libnvshmem/build-libnvshmem.sh b/recipes/libnvshmem/build-libnvshmem.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -e
+
+$BUILD_PREFIX/bin/check-glibc bin/* lib/* bin/examples/* bin/perftest/device/coll/* bin/perftest/device/pt-to-pt/* bin/perftest/host/coll/* bin/perftest/host/init/* bin/perftest/host/pt-to-pt/*
+
+mkdir -p $PREFIX/lib/cmake/nvshmem/
+
+cp -rv bin $PREFIX/
+cp -rv include/ $PREFIX/
+cp -rv lib/cmake/ $PREFIX/lib/
+cp -rv lib/libnvshmem_host.so $PREFIX/lib
+cp -rv lib/nvshmem_bootstrap*.so $PREFIX/lib
+cp -rv lib/nvshmem_transport*.so $PREFIX/lib
+cp -rv share/ $PREFIX/
+
diff --git a/recipes/libnvshmem/compile_perf_test.sh b/recipes/libnvshmem/compile_perf_test.sh
@@ -0,0 +1,37 @@
+
+#!/bin/bash
+
+set -ex
+
+#GPU Arch - anything recent should do but change accordingly if build breaks
+SM=89
+
+[[ ${target_platform} == "linux-64" ]] && targetsDir="targets/x86_64-linux"
+[[ ${target_platform} == "linux-aarch64" ]] && targetsDir="targets/sbsa-linux"
+
+# E.g. $CONDA_PREFIX/libexec/gcc/x86_64-conda-linux-gnu/13.3.0/cc1plus
+find $CONDA_PREFIX -name cc1plus
+
+GCC_DIR=$(dirname $(find $CONDA_PREFIX -name cc1plus))
+
+export PATH=${GCC_DIR}:$PATH
+export LD_LIBRARY_PATH=${GCC_DIR}:$LD_LIBRARY_PATH
+
+# No need for use-linker-plugin optimization, causes compile failure, don't use it for the test
+export CXXFLAGS="${CXXFLAGS} -fno-use-linker-plugin"
+
+echo CC =  $CC
+echo CXX =  $CXX
+
+cmake -S $PREFIX/share/src/perftest \
+  -DCMAKE_LIBRARY_PATH=${GCC_DIR} \
+  -DCMAKE_C_COMPILER=$CC \
+  -DCMAKE_CUDA_COMPILER=$PREFIX/bin/nvcc \
+  -DCMAKE_CXX_COMPILER=$CXX \
+  -DCUDAToolkit_INCLUDE_DIRECTORIES="$PREFIX/include;$PREFIX/${targetsDir}/include" \
+  -DNVSHMEM_MPI_SUPPORT=0 \
+  -DNVSHMEM_PREFIX=$PREFIX \
+  -DCUDA_HOME=$PREFIX \
+  -DCMAKE_CUDA_ARCHITECTURES="${SM}"
+
+cmake --build .
diff --git a/recipes/libnvshmem/conda_build_config.yaml b/recipes/libnvshmem/conda_build_config.yaml
@@ -0,0 +1,2 @@
+c_stdlib_version:  # [linux]
+  - 2.28  # [linux]
diff --git a/recipes/libnvshmem/meta.yaml b/recipes/libnvshmem/meta.yaml
@@ -0,0 +1,225 @@
+{% set version = "3.1.7" %}
+{% set platform = "linux-x86_64" %}  # [linux64]
+{% set platform = "linux-sbsa" %}    # [aarch64]
+{% set extension = "tar.xz" %}
+
+{% set soname = version.split(".")[0] %}
+{% set cuda_major = environ.get("cuda_compiler_version", "12.6").split(".")[0] %}
+
+package:
+  name: libnvshmem-split
+  version: {{ version }}
+
+source:
+  url: https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/{{ platform }}/libnvshmem-{{ platform }}-{{ version }}_cuda{{ cuda_major }}-archive.{{ extension }}
+  sha256: 4e03e40d16770a5bdbcefeb7f18579bdfc07aaa7b09a4825a7ab9b5122f69567  # [linux64 and (cuda_compiler_version or "").startswith("11")]
+  sha256: 128c62dd19926f58d1ff69290917dd50f3fb1690567f52d3965cd8fbfb9b2412  # [linux64 and (cuda_compiler_version or "").startswith("12")]
+  sha256: 03e4404e888609bc98f496453896acc34c9ca7bbc39cc32b41d677bcaab71c49  # [aarch64 and (cuda_compiler_version or "").startswith("12")]
+
+build:
+  number: 0
+  skip: true  # [not (linux64 or aarch64)]
+  skip: true  # [cuda_compiler_version in (None, "None") or (aarch64 and (cuda_compiler_version or "").startswith("11"))]
+
+outputs:
+  - name: libnvshmem{{ soname }}
+    build:
+      ignore_run_exports_from:    # [(cuda_compiler_version or "").startswith("12")]
+        - {{ compiler('cuda') }}  # [(cuda_compiler_version or "").startswith("12")]
+        - cuda-cudart-dev         # [(cuda_compiler_version or "").startswith("12")]
+      missing_dso_whitelist:
+        - "*libcuda.so*" # driver
+        - "*openmpi*"    # omitted as dependency - customer HPC systems have own versions of MPI installed already
+        - "*libfabric*"  # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin
+        - "*libfabric1*" # omitted as dependency - used for both Slingshot and EFA NICs. They have a custom libfabric installation and plugin
+        - "*libmlx5*"    # omitted as dependency - needed for ibgda and ibdevx transport, installed as part of the Mellanox OFED
+        - "*libmpi*"     # omitted as dependency - part of openmpi
+        - "*libpmix*"    # omitted as dependency - part of openmpi
+        - "*liboshmem*"  # omitted as dependency - part of openmpi
+        - "*rdma-core*"  # omitted as dependency - MOFED replaces rdma-core
+      script: |
+        cp -rv lib $PREFIX/
+    files:
+      - lib/libnvshmem_host.so.*
+      - lib/nvshmem_bootstrap*.so.*
+      - lib/nvshmem_transport*.so.*
+    requirements:
+      build:
+        - {{ compiler('c') }}
+        - {{ compiler('cxx') }}
+        - {{ compiler('cuda') }}
+        - {{ stdlib("c") }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+      host:
+        - cuda-version {{ cuda_compiler_version }}  # [cuda_compiler_version != "None"]
+        - cuda-cudart-dev   # [(cuda_compiler_version or "").startswith("12")]
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - ucx
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - cuda-cudart       # [(cuda_compiler_version or "").startswith("12")]
+        - libpciaccess
+        - libpmix
+        - nccl
+        - pmix-bin
+        - ucx
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      commands:
+        - test -L $PREFIX/lib/libnvshmem_host.so.{{ soname }}
+        - test -f $PREFIX/lib/libnvshmem_host.so.{{ version }}
+        - test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_mpi.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmi.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmi2.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_pmix.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_shmem.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_bootstrap_uid.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibdevx.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibgda.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ibrc.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_libfabric.so.{{ soname }}.0.0
+        - test -L $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}
+        - test -f $PREFIX/lib/nvshmem_transport_ucx.so.{{ soname }}.0.0
+
+  - name: libnvshmem-dev
+    build:
+      ignore_run_exports_from:    # [(cuda_compiler_version or "").startswith("12")]
+        - {{ compiler('cuda') }}  # [(cuda_compiler_version or "").startswith("12")]
+        - cuda-cudart-dev         # [(cuda_compiler_version or "").startswith("12")]
+      missing_dso_whitelist:
+        - "*libcuda.so*" # driver
+        - "*libnvshmem_host.so.{{ soname }}" # avoids: ERROR (libnvshmem-dev,bin/perftest/device/pt-to-pt/shmem_put_bw): lib/libnvshmem_host.so.3 not found in any packages. We DO test for its existence though.
+        - "*libmpi*"
+        - "*libpmix*"
+        - "*liboshmem*"
+      run_exports:
+        - {{ pin_subpackage("libnvshmem" ~ soname, max_pin=None) }}
+    script: build-libnvshmem.sh
+    script_interpreter: /bin/bash
+    requirements:
+      build:
+        - {{ compiler("c") }}
+        - {{ compiler('cuda') }}
+        - {{ compiler("cxx") }}
+        - {{ stdlib("c") }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+        - cf-nvidia-tools 1  # [linux]
+      host:
+        - cuda-version {{ cuda_compiler_version }}  # [cuda_compiler_version != "None"]
+        - cuda-cudart-dev   # [(cuda_compiler_version or "").startswith("12")]
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - libpciaccess
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - {{ pin_subpackage("libnvshmem" ~ soname, exact=True) }}
+        - cudatoolkit       # [(cuda_compiler_version or "").startswith("11")]
+        - cuda-cudart       # [(cuda_compiler_version or "").startswith("12")]
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      commands:
+        - test -f $PREFIX/bin/hydra_pmi_proxy
+        - test -f $PREFIX/bin/nvshmem-info
+        - test -f $PREFIX/bin/nvshmrun.hydra
+        - test -f $PREFIX/bin/hydra_nameserver
+        - test -f $PREFIX/bin/nvshmrun
+        - test -f $PREFIX/bin/hydra_persist
+        - test -f $PREFIX/bin/examples/collective-launch
+        - test -f $PREFIX/bin/examples/on-stream
+        - test -f $PREFIX/bin/perftest/device/pt-to-pt/shmem_p_latency
+        - test -f $PREFIX/bin/perftest/device/coll/reduction_latency
+        - test -f $PREFIX/bin/perftest/host/init/malloc
+        - test -f $PREFIX/bin/perftest/host/pt-to-pt/bw
+        - test -f $PREFIX/bin/perftest/host/coll/broadcast_on_stream
+        - test -f $PREFIX/include/bootstrap_device_host/nvshmem_uniqueid.h
+        - test -f $PREFIX/include/device/nvshmem_defines.h
+        - test -f $PREFIX/include/device_host/nvshmem_types.h
+        - test -f $PREFIX/include/host/nvshmem_api.h
+        - test -f $PREFIX/include/non_abi/device/coll/defines.cuh
+        - test -f $PREFIX/include/device_host_transport/nvshmem_constants.h
+        - test -f $PREFIX/include/nvshmem.h
+        - test -f $PREFIX/include/nvshmemx.h
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMConfig.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets-release.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMDeviceTargets.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets-release.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMTargets.cmake
+        - test -f $PREFIX/lib/cmake/nvshmem/NVSHMEMVersion.cmake
+        - test -L $PREFIX/lib/libnvshmem_host.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_mpi.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmi2.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_pmix.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_shmem.so
+        - test -L $PREFIX/lib/nvshmem_bootstrap_uid.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibdevx.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibgda.so
+        - test -L $PREFIX/lib/nvshmem_transport_ibrc.so
+        - test -L $PREFIX/lib/nvshmem_transport_libfabric.so
+        - test -L $PREFIX/lib/nvshmem_transport_ucx.so
+
+  - name: libnvshmem-static
+    build:
+      script: |
+        cp -rv lib/libnvshmem*.a $PREFIX/lib/
+    files:
+      - lib/libnvshmem*.a
+    requirements:
+      build:
+        - {{ compiler("c") }}
+        - {{ compiler("cxx") }}
+        - {{ stdlib("c") }}
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+      host:
+        - cuda-version {{ cuda_compiler_version }}  # [cuda_compiler_version != "None"]
+      run:
+        - {{ pin_compatible("cuda-version", min_pin="x", max_pin="x") }}
+        - {{ pin_subpackage("libnvshmem-dev", exact=True) }}
+        - nccl
+        - ucx
+      run_constrained:
+        - arm-variant * {{ arm_variant_type }}  # [aarch64]
+    test:
+      skip: true  # [not linux64]
+      requires:
+        - {{ compiler('c') }}
+        - {{ compiler('cxx') }}
+        - {{ stdlib("c") }}
+        - cmake
+        - make
+        - cuda-version {{ cuda_compiler_version }}                     # [cuda_compiler_version != "None"]
+        - {{ compiler('cuda') }}
+        - openmpi
+      files:
+        - compile_perf_test.sh
+      commands:
+        - test -f $PREFIX/lib/libnvshmem_device.a
+        - test -f $PREFIX/lib/libnvshmem.a
+        - bash compile_perf_test.sh
+
+about:
+  home: https://docs.nvidia.com/nvshmem/index.html
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  license_file: LICENSE
+  license_url: https://docs.nvidia.com/nvshmem/api/sla.html
+  summary: NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs.
+  description: |
+    NVIDIA NVSHMEM is an NVIDIA based "shared memory" library that provides an easy-to-use CPU-side interface to allocate pinned memory that is symmetrically distributed across a cluster of NVIDIA GPUs. 
+    NVSHMEM can significantly reduce communication and coordination overheads by allowing programmers to perform these operations from within CUDA kernels and on CUDA streams.
+  doc_url: https://docs.nvidia.com/nvshmem/api/index.html
+
+extra:
+  feedstock-name: libnvshmem
+  recipe-maintainers:
+    - conda-forge/cuda