Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/kvbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ These arguments are used by both `plan` and `profile` commands:
| -------- | ----------- |
| `--source` | Source of the nixl descriptors [file, memory, gpu] (default: file) |
| `--destination` | Destination of the nixl descriptors [file, memory, gpu] (default: memory) |
| `--backend` | Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX) |
| `--backend` | Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, LIBFABRIC] (default: UCX) |
| `--worker_type` | Worker to use to transfer data [nixl, nvshmem] (default: nixl) |
| `--initiator_seg_type` | Memory segment type for initiator [DRAM, VRAM] (default: DRAM) |
| `--target_seg_type` | Memory segment type for target [DRAM, VRAM] (default: DRAM) |
Expand Down
2 changes: 1 addition & 1 deletion benchmark/kvbench/commands/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def nixl_bench_args(func):
func = click.option(
"--backend",
type=str,
help="Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX)",
help="Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, LIBFABRIC] (default: UCX)",
)(func)
func = click.option(
"--worker_type",
Expand Down
15 changes: 14 additions & 1 deletion benchmark/kvbench/test/custom_traffic_perftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __init__(
self.nixl_agent = nixl_agent
if mem_type in ("cuda", "vram"):
device = torch.device("cuda")
else mem_type in ("hpu", "vram"):
device = torch.device("hpu")
elif mem_type in ("cpu", "dram"):
device = torch.device("cpu")
else:
Expand Down Expand Up @@ -95,6 +97,8 @@ def destroy(self):
if hasattr(self.buf, "is_cuda") and self.buf.is_cuda:
del self.buf
torch.cuda.empty_cache()
if hasattr(self.buf, "is_hpu") and self.buf.is_hpu:
del self.buf


class CTPerftest:
Expand Down Expand Up @@ -122,6 +126,15 @@ def __init__(
logger.warning(
"Cuda buffers detected, but the env var CUDA_VISIBLE_DEVICES is not set, this will cause every process in the same host to use the same GPU device."
)

if (
not os.environ.get("HABANA_VISIBLE_MODULES")
and self.traffic_pattern.mem_type == "hpu"
):
logger.warning(
"hpu buffers detected, but the env var HABANA_VISIBLE_DEVICES is not set, this will cause every process in the same host to use the same GPU device."
)


"""Initialize the buffers, one big send and recv buffer is used for all the transfers
it has to be chunked inside each transfer to get buffers per ranks
Expand Down Expand Up @@ -250,7 +263,7 @@ def _warmup(
self,
iters=15,
fill_value: int = 100000,
mem_type: Literal["cuda", "vram", "cpu", "dram"] = "cuda",
mem_type: Literal["cuda", "vram", "cpu", "dram", "hpu", "vram"] = "cuda",
):
full_matrix = np.full((self.world_size, self.world_size), fill_value=fill_value)
tp = TrafficPattern(matrix=full_matrix, mem_type=mem_type)
Expand Down
9 changes: 9 additions & 0 deletions benchmark/kvbench/test/sequential_custom_traffic_perftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ def __init__(
logger.warning(
"Cuda buffers detected, but the env var CUDA_VISIBLE_DEVICES is not set, this will cause every process in the same host to use the same GPU device."
)

if (
not os.environ.get("HABANA_VISIBLE_MODULES")
and self.traffic_pattern.mem_type == "hpu"
):
logger.warning(
"hpu buffers detected, but the env var HABANA_VISIBLE_DEVICES is not set, this will cause every process in the same host to use the same GPU device."
)

assert "UCX" in self.nixl_agent.get_plugin_list(), "UCX plugin is not loaded"

# NixlBuffer caches buffers and reuse them if they are big enough, let's initialize them once, with the largest needed size
Expand Down
2 changes: 1 addition & 1 deletion benchmark/kvbench/test/traffic_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TrafficPattern:
"""

matrix: np.ndarray
mem_type: Literal["cuda", "vram", "cpu", "dram"]
mem_type: Literal["cuda", "vram", "cpu", "dram","hpu"]
xfer_op: Literal["WRITE", "READ"] = "WRITE"
shards: int = 1
dtype: torch.dtype = torch.int8
Expand Down
120 changes: 107 additions & 13 deletions benchmark/nixlbench/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,87 @@ if cuda_available
endif
endif

# SynapseAI (Habana Gaudi) dependency detection
synapse_inc_path = get_option('synapsepath_inc')
synapse_lib_path = get_option('synapsepath_lib')

if synapse_lib_path == ''
#use default path
# Try to find both libSynapse and hl-thunk libraries
synapse_lib = cpp.find_library('Synapse',
dirs: ['/usr/lib/habanalabs', '/usr/local/lib/habanalabs'],
required: false)
hlthunk_lib = cpp.find_library('hl-thunk',
dirs: ['/usr/lib/habanalabs', '/usr/local/lib/habanalabs'],
required: false)
else
synapse_lib = cpp.find_library('Synapse',
dirs: [synapse_lib_path],
required: false)
hlthunk_lib = cpp.find_library('hl-thunk',
dirs: [synapse_lib_path],
required: false)
endif

if synapse_inc_path == ''
#use default path
synapse_inc_path = '/usr/include/habanalabs/'
endif

# SynapseAI support requires both libraries
synapseai_dep = dependency('', required: false) # Initialize as not found
if synapse_lib.found() and hlthunk_lib.found()
synapseai_dep = declare_dependency(dependencies: [synapse_lib, hlthunk_lib])
elif hlthunk_lib.found()
# Fallback to just hl-thunk if libSynapse not available
synapseai_dep = hlthunk_lib
endif

if synapseai_dep.found()
# Create proper dependency with include paths (including DRM path for habanalabs headers)
synapseai_dep = declare_dependency(
dependencies: synapseai_dep,
include_directories: [
include_directories('/usr/include/drm'),
include_directories(synapse_inc_path)
]
)
message('Found SynapseAI support for Habana Gaudi devices')
synapseai_available = true
else
synapseai_available = false
warning('SynapseAI not found. Habana Gaudi device support will be disabled.')
endif

# GFlags
gflags_dep = dependency('gflags', required: true)

# OpenMP
openmp_dep = dependency('openmp', required: true)

# Check for etcd-cpp-api - use multiple methods for discovery
# Try pkg-config first
etcd_dep = dependency('etcd-cpp-api', required : false)
if not etcd_dep.found()
# Fallback: manual configuration
# message('etcd-cpp-api not found via pkg-config, using manual configuration')

# Ensure etcd is available
# Check if we have the library files
etcd_lib = meson.get_compiler('cpp').find_library('etcd-cpp-api',
dirs: ['/usr/local/lib'],
required: false)

if etcd_lib.found()
etcd_dep = declare_dependency(
include_directories: include_directories('/usr/local/include'),
dependencies: [etcd_lib],
# Add any required dependencies for etcd-cpp-api
link_args: [] # Add any additional link args if needed
)
message('etcd-cpp-api found manually in /usr/local/lib')
else
etcd_dep = disabler()
message('etcd-cpp-api not found anywhere')
endif
endif
etcd_available = etcd_dep.found()
if etcd_available
add_project_arguments('-DHAVE_ETCD', language: 'cpp')
Expand Down Expand Up @@ -148,7 +219,14 @@ if cuda_fabric_available
add_project_arguments('-DHAVE_CUDA_FABRIC', language: 'cpp')
endif

if synapseai_available
add_project_arguments('-DHAVE_SYNAPSEAI', language: 'cpp')
endif

# Subprojects
if synapseai_available
subdir('src/synapseai')
endif
subdir('src/utils')
subdir('src/runtime')
subdir('src/worker')
Expand All @@ -161,6 +239,7 @@ configure_file(
'HAVE_NVSHMEM': nvshmem_available ? '1' : '0',
'HAVE_CUDA': cuda_available ? '1' : '0',
'HAVE_CUDA_FABRIC': cuda_fabric_available ? '1' : '0',
'HAVE_SYNAPSEAI': synapseai_available ? '1' : '0',
},
install: true,
install_dir: get_option('includedir') / 'nixlbench'
Expand All @@ -174,6 +253,11 @@ endif
if cuda_available
deps += [cuda_dep]
endif

if synapseai_available
deps += [synapseai_dep]
message('add synapseai_dep')
endif
if nvshmem_available
deps += [nvshmem_lib]
args += [
Expand All @@ -185,9 +269,9 @@ if nvshmem_available
]
endif

if not etcd_available
error('No runtime available or not found')
endif
#if not etcd_available
# error('No runtime available or not found')
#endif

if nvshmem_available
# Use nvcc directly for compilation and linking
Expand Down Expand Up @@ -240,11 +324,21 @@ if nvshmem_available
install_dir: get_option('bindir'),
depends: [nixlbench_runtimes, utils_lib, worker_libs])
else
executable('nixlbench', 'src/main.cpp',
include_directories: inc_dir,
link_with: [nixlbench_runtimes, utils_lib, worker_libs],
dependencies: deps,
link_args: args,
install: true,
install_dir: get_option('bindir'))
if synapseai_available
executable('nixlbench', 'src/main.cpp',
include_directories: inc_dir,
link_with: [nixlbench_runtimes, utils_lib, worker_libs, synapseaiutils_lib],
dependencies: deps,
link_args: args,
install: true,
install_dir: get_option('bindir'))
else
executable('nixlbench', 'src/main.cpp',
include_directories: inc_dir,
link_with: [nixlbench_runtimes, utils_lib, worker_libs],
dependencies: deps,
link_args: args,
install: true,
install_dir: get_option('bindir'))
endif
endif
2 changes: 2 additions & 0 deletions benchmark/nixlbench/meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ option('etcd_lib_path', type: 'string', value: '', description: 'Path to ETCD C+
option('nixl_path', type: 'string', value: '/usr/local', description: 'Path to NiXL')
option('nvshmem_inc_path', type: 'string', value: '', description: 'Path to NVSHMEM include directory')
option('nvshmem_lib_path', type: 'string', value: '', description: 'Path to NVSHMEM library directory')
option('synapsepath_inc', type: 'string', value: '', description: 'Include path for Intel Gaudi/ HPU')
option('synapsepath_lib', type: 'string', value: '', description: 'Library path for Intel Gaudi/ HPU')
34 changes: 34 additions & 0 deletions benchmark/nixlbench/src/synapseai/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

synapseaiutils_sources = [
'synapse_utils.cpp',
'synapse_utils.h',
]

synapseaiutils_deps = [
synapseai_dep
]

synapseaiutils_lib = static_library('synapseaiutils',
synapseaiutils_sources,
dependencies: synapseaiutils_deps,
include_directories: inc_dir
)
synapseaiutils_dep = declare_dependency(
link_with: synapseaiutils_lib,
dependencies: synapseaiutils_deps,
include_directories: inc_dir
)
Loading