diff --git a/src/plugins/libfabric/libfabric_backend.cpp b/src/plugins/libfabric/libfabric_backend.cpp index 5813e88656..c15b3b44f7 100644 --- a/src/plugins/libfabric/libfabric_backend.cpp +++ b/src/plugins/libfabric/libfabric_backend.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -56,7 +56,7 @@ #ifdef HAVE_CUDA static int -cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx) { +cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx, std::string &pci_bus_id) { CUmemorytype mem_type = CU_MEMORYTYPE_HOST; uint32_t is_managed = 0; CUpointer_attribute attr_type[4]; @@ -75,6 +75,19 @@ cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx) { result = cuPointerGetAttributes(4, attr_type, attr_data, (CUdeviceptr)address); is_dev = (mem_type == CU_MEMORYTYPE_DEVICE); + // Get PCI bus ID if device memory + if (result == CUDA_SUCCESS && is_dev) { + char pci_buf[32]; + CUresult pci_result = cuDeviceGetPCIBusId(pci_buf, sizeof(pci_buf), dev); + if (pci_result == CUDA_SUCCESS) { + pci_bus_id = std::string(pci_buf); + } else { + pci_bus_id = ""; + } + } else { + pci_bus_id = ""; + } + return (CUDA_SUCCESS != result); } @@ -89,6 +102,7 @@ nixlLibfabricCudaCtx::cudaUpdateCtxPtr(void *address, int expected_dev, bool &wa bool is_dev; CUdevice dev; CUcontext ctx; + std::string pci_bus_id; // Not used here, but required by cudaQueryAddr int ret; was_updated = false; @@ -96,7 +110,7 @@ nixlLibfabricCudaCtx::cudaUpdateCtxPtr(void *address, int expected_dev, bool &wa if (expected_dev == -1) return -1; if (myDevId_ != -1 && expected_dev != myDevId_) return -1; - ret = cudaQueryAddr(address, is_dev, dev, ctx); + ret = cudaQueryAddr(address, is_dev, dev, ctx, pci_bus_id); if (ret) return ret; if (!is_dev) return 0; if (dev != expected_dev) return -1; @@ -734,6 +748,7 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem, priv->length_ = mem.len; priv->gpu_device_id_ = mem.devId; // Store GPU device ID + std::string pci_bus_id = ""; #ifdef HAVE_CUDA // Handle CUDA memory registration with GPU Direct RDMA support if (nixl_mem == VRAM_SEG) { @@ -760,6 +775,19 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem, } NIXL_DEBUG << "Set CUDA device context to GPU " << mem.devId; } + + // Query PCI bus ID from memory address (AFTER setting context) + bool is_dev; + CUdevice dev; + CUcontext ctx; + + int ret = cudaQueryAddr((void *)mem.addr, is_dev, dev, ctx, pci_bus_id); + if (ret || !is_dev) { + NIXL_ERROR << "Failed to query device from memory " << (void *)mem.addr; + return NIXL_ERR_BACKEND; + } + + NIXL_DEBUG << "Queried PCI bus ID: " << pci_bus_id << " for GPU " << mem.devId; } #endif @@ -777,12 +805,14 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem, // Use Rail Manager for centralized memory registration with GPU Direct RDMA support NIXL_TRACE << "Registering memory: addr=" << (void *)mem.addr << " len=" << mem.len - << " mem_type=" << nixl_mem << " devId=" << mem.devId; + << " mem_type=" << nixl_mem << " devId=" << mem.devId + << (nixl_mem == VRAM_SEG ? " pci_bus_id=" + pci_bus_id : ""); nixl_status_t status = rail_manager.registerMemory((void *)mem.addr, mem.len, nixl_mem, mem.devId, + pci_bus_id, priv->rail_mr_list_, priv->rail_key_list_, priv->selected_rails_); diff --git a/src/utils/libfabric/libfabric_rail_manager.cpp b/src/utils/libfabric/libfabric_rail_manager.cpp index 5bf23ae165..7bb23dba22 100644 --- a/src/utils/libfabric/libfabric_rail_manager.cpp +++ b/src/utils/libfabric/libfabric_rail_manager.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,6 +21,7 @@ #include "libfabric/libfabric_topology.h" #include "common/nixl_log.h" #include "serdes/serdes.h" +#include // Forward declaration for LibfabricUtils namespace namespace LibfabricUtils { @@ -46,6 +47,7 @@ nixlLibfabricRailManager::nixlLibfabricRailManager(size_t striping_threshold) // Get network devices from topology and create rails automatically std::vector all_devices = topology->getAllDevices(); + std::string selected_provider_name = topology->getProviderName(); NIXL_DEBUG << "Got " << all_devices.size() @@ -321,16 +323,25 @@ nixlLibfabricRailManager::prepareAndSubmitTransfer( std::vector nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr, nixl_mem_t mem_type, - int gpu_id) const { + int gpu_id, + const std::string &gpu_pci_bus_id) const { if (mem_type == VRAM_SEG) { #ifdef HAVE_CUDA if (gpu_id < 0) { NIXL_ERROR << "Invalid GPU ID " << gpu_id << " for VRAM memory " << mem_addr; return {}; // Return empty vector to indicate failure } - std::vector gpu_efa_devices = topology->getEfaDevicesForGpu(gpu_id); + + // Use PCI bus ID provided by caller (queried in backend layer) + if (gpu_pci_bus_id.empty()) { + NIXL_ERROR << "Empty PCI bus ID provided for VRAM memory " << mem_addr; + return {}; // Return empty vector to indicate failure + } + + // Get EFA devices for this PCI bus ID + std::vector gpu_efa_devices = topology->getEfaDevicesForGPUPci(gpu_pci_bus_id); if (gpu_efa_devices.empty()) { - NIXL_ERROR << "No EFA devices found for GPU " << gpu_id; + NIXL_ERROR << "No EFA devices found for PCI " << gpu_pci_bus_id; return {}; // Return empty vector to indicate failure } std::vector gpu_rails; @@ -340,7 +351,7 @@ nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr, // Bounds check: ensure rail index is valid if (it->second < data_rails_.size()) { gpu_rails.push_back(it->second); - NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id + NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU-PCI " << gpu_pci_bus_id << " mapped to rail " << it->second << " (EFA device=" << efa_device << ")"; } else { @@ -348,18 +359,18 @@ nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr, << " but only " << data_rails_.size() << " rails available"; } } else { - NIXL_WARN << "EFA device " << efa_device << " not found in rail mapping for GPU " - << gpu_id; + NIXL_WARN << "EFA device " << efa_device + << " not found in rail mapping for GPU-PCI " << gpu_pci_bus_id; } } if (gpu_rails.empty()) { - NIXL_ERROR << "No valid rail mapping found for GPU " << gpu_id << " (checked " - << gpu_efa_devices.size() << " EFA devices)"; + NIXL_ERROR << "No valid rail mapping found for GPU-PCI " << gpu_pci_bus_id + << " (checked " << gpu_efa_devices.size() << " EFA devices)"; return {}; } - NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id << " will use " + NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU-PCI " << gpu_pci_bus_id << " will use " << gpu_rails.size() << " rails total"; return gpu_rails; #else @@ -390,6 +401,7 @@ nixlLibfabricRailManager::registerMemory(void *buffer, size_t length, nixl_mem_t mem_type, int gpu_id, + const std::string &gpu_pci_bus_id, std::vector &mr_list_out, std::vector &key_list_out, std::vector &selected_rails_out) { @@ -398,8 +410,11 @@ nixlLibfabricRailManager::registerMemory(void *buffer, return NIXL_ERR_INVALID_PARAM; } - // Use internal rail selection with explicit GPU ID - std::vector selected_rails = selectRailsForMemory(buffer, mem_type, gpu_id); + // Select rails based on memory type and PCI bus ID + // For VRAM: uses PCI bus ID provided by backend to map to topology-aware rails + // For DRAM: uses all available rails + std::vector selected_rails = + selectRailsForMemory(buffer, mem_type, gpu_id, gpu_pci_bus_id); if (selected_rails.empty()) { NIXL_ERROR << "No rails selected for memory type " << mem_type; return NIXL_ERR_NOT_SUPPORTED; @@ -429,6 +444,7 @@ nixlLibfabricRailManager::registerMemory(void *buffer, struct fid_mr *mr; uint64_t key; + // Pass gpu_id parameter to individual rail's registerMemory calls nixl_status_t status = data_rails_[rail_idx]->registerMemory(buffer, length, mem_type, gpu_id, &mr, &key); if (status != NIXL_SUCCESS) { diff --git a/src/utils/libfabric/libfabric_rail_manager.h b/src/utils/libfabric/libfabric_rail_manager.h index 9baca7c18f..b713f30f24 100644 --- a/src/utils/libfabric/libfabric_rail_manager.h +++ b/src/utils/libfabric/libfabric_rail_manager.h @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,6 +110,7 @@ class nixlLibfabricRailManager { * @param length Buffer size in bytes * @param mem_type Memory type (DRAM_SEG or VRAM_SEG) * @param gpu_id GPU device ID (used for VRAM_SEG, ignored for DRAM_SEG) + * @param gpu_pci_bus_id PCI bus ID for VRAM-GPU (queried in backend layer), empty for DRAM * @param mr_list_out Memory registration handles, indexed by rail ID * @param key_list_out Remote access keys, indexed by rail ID * @param selected_rails_out List of rail IDs where memory was registered @@ -120,6 +121,7 @@ class nixlLibfabricRailManager { size_t length, nixl_mem_t mem_type, int gpu_id, + const std::string &gpu_pci_bus_id, std::vector &mr_list_out, std::vector &key_list_out, std::vector &selected_rails_out); @@ -316,7 +318,10 @@ class nixlLibfabricRailManager { // Internal rail selection method std::vector - selectRailsForMemory(void *mem_addr, nixl_mem_t mem_type, int gpu_id) const; + selectRailsForMemory(void *mem_addr, + nixl_mem_t mem_type, + int gpu_id, + const std::string &pci_bus_id = "") const; // Helper functions for connection SerDes void diff --git a/src/utils/libfabric/libfabric_topology.cpp b/src/utils/libfabric/libfabric_topology.cpp index ce86ef3db1..a2c673afcf 100644 --- a/src/utils/libfabric/libfabric_topology.cpp +++ b/src/utils/libfabric/libfabric_topology.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -135,18 +135,37 @@ nixlLibfabricTopology::discoverEfaDevices() { } std::vector -nixlLibfabricTopology::getEfaDevicesForGpu(int gpu_id) const { - auto it = gpu_to_efa_devices.find(gpu_id); - if (it != gpu_to_efa_devices.end()) { - return it->second; +nixlLibfabricTopology::getEfaDevicesForGPUPci(const std::string &pci_bus_id) const { + // Normalize PCI bus ID format to match hwloc format + // CUDA format: "0000:59:00.0" → hwloc format: "0:59:00.0" + unsigned int domain, bus, device, function; + if (sscanf(pci_bus_id.c_str(), "%x:%x:%x.%x", &domain, &bus, &device, &function) == 4) { + char normalized_pci[32]; + snprintf(normalized_pci, + sizeof(normalized_pci), + "%x:%02x:%02x.%x", + domain, + bus, + device, + function); + std::string normalized_id(normalized_pci); + + auto it = pci_to_efa_devices.find(normalized_id); + if (it != pci_to_efa_devices.end()) { + NIXL_DEBUG << "Found EFA devices for PCI " << pci_bus_id << " (normalized to " + << normalized_id << ")"; + return it->second; + } + // PCI ID parsed successfully but not found in mapping + NIXL_WARN << "PCI bus ID " << pci_bus_id << " (normalized to " << normalized_id + << ") not found in GPU-EFA mapping, returning all devices"; + } else { + // Failed to parse PCI bus ID format + NIXL_WARN << "Failed to parse PCI bus ID format: " << pci_bus_id + << ", returning all devices"; } - NIXL_WARN << "No EFA devices found for GPU " << gpu_id << ", returning all devices"; - return all_devices; -} -bool -nixlLibfabricTopology::isValidGpuId(int gpu_id) const { - return gpu_id >= 0 && gpu_id < num_gpus; + return all_devices; } bool @@ -165,10 +184,10 @@ nixlLibfabricTopology::printTopologyInfo() const { for (size_t i = 0; i < all_devices.size(); ++i) { NIXL_TRACE << " [" << i << "] " << all_devices[i]; } - NIXL_TRACE << "GPU → EFA mapping:"; - for (const auto &pair : gpu_to_efa_devices) { + NIXL_TRACE << "GPU-PCI → EFA mapping:"; + for (const auto &pair : pci_to_efa_devices) { std::stringstream ss; - ss << " GPU " << pair.first << " → ["; + ss << " GPU-PCI " << pair.first << " → ["; for (size_t i = 0; i < pair.second.size(); ++i) { if (i > 0) ss << ", "; ss << pair.second[i]; @@ -423,7 +442,7 @@ nixlLibfabricTopology::buildPcieToLibfabricMapping() { nixl_status_t nixlLibfabricTopology::buildGpuToEfaMapping() { - gpu_to_efa_devices.clear(); + pci_to_efa_devices.clear(); // Implement NIXL's topology-aware GPU-EFA grouping algorithm nixl_status_t status = buildTopologyAwareGrouping(); if (status != NIXL_SUCCESS) { @@ -431,7 +450,7 @@ nixlLibfabricTopology::buildGpuToEfaMapping() { return buildFallbackMapping(); } - NIXL_TRACE << "Built GPU→EFA mapping for " << gpu_to_efa_devices.size() + NIXL_TRACE << "Built PCI→EFA mapping for " << pci_to_efa_devices.size() << " GPUs using topology-aware algorithm"; return NIXL_SUCCESS; @@ -527,13 +546,17 @@ nixlLibfabricTopology::buildTopologyAwareGrouping() { } if (gpu_index >= 0) { - gpu_to_efa_devices[gpu_index] = gpu_efa_devices; - - NIXL_TRACE << "GPU " << gpu_index << " (" << std::hex << group.closest_gpu.domain_id - << ":" << static_cast(group.closest_gpu.bus_id) << ":" - << static_cast(group.closest_gpu.device_id) << "." - << static_cast(group.closest_gpu.function_id) << std::dec << ") → " - << gpu_efa_devices.size() << " EFA devices"; + // Store mapping using PCI bus ID as key + std::string pci_bus_id = getPcieAddressFromHwlocObj(group.closest_gpu.hwloc_node); + pci_to_efa_devices[pci_bus_id] = gpu_efa_devices; + + NIXL_TRACE << "PCI " << pci_bus_id << " (GPU " << gpu_index << ") → " + << gpu_efa_devices.size() << " EFA devices: ["; + for (size_t i = 0; i < gpu_efa_devices.size(); ++i) { + if (i > 0) NIXL_TRACE << ", "; + NIXL_TRACE << gpu_efa_devices[i]; + } + NIXL_TRACE << "]"; } } } @@ -543,15 +566,12 @@ nixlLibfabricTopology::buildTopologyAwareGrouping() { nixl_status_t nixlLibfabricTopology::buildFallbackMapping() { // Fallback: if specific mapping failed, use simple approach - gpu_to_efa_devices.clear(); - // Give all devices to all GPUs (not optimal but functional) - for (int gpu_id = 0; gpu_id < num_gpus; ++gpu_id) { - gpu_to_efa_devices[gpu_id] = all_devices; - } + // We can't build PCI-based mapping without topology, so just return success + // getEfaDevicesForPci() will return all_devices when no mapping is found + NIXL_WARN << "Using fallback: all GPUs will use all available EFA devices"; return NIXL_SUCCESS; } - // hwloc helper methods std::string @@ -607,8 +627,8 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector &discovered_ // Implement NIXL's topology-aware NIC grouping algorithm // Step 1: Mark topology nodes that have NICs in their subtree - std::map node_group_counts; - std::map> node_nics; + std::unordered_map node_group_counts; + std::unordered_map> node_nics; std::set nic_subtree_nodes; // Mark all nodes that have NICs in their subtree and collect NICs per node for (const auto &nic : discovered_nics) { @@ -621,7 +641,7 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector &discovered_ } // Step 2: For each GPU, walk up until finding a NIC subtree node and increment its count - std::map> node_gpus; + std::unordered_map> node_gpus; for (const auto &gpu : discovered_gpus) { hwloc_obj_t node = gpu.hwloc_node; @@ -637,7 +657,7 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector &discovered_ } // Step 3: Collect all NICs that need to be grouped and assign them to ancestor nodes - std::map> ancestor_nics; + std::unordered_map> ancestor_nics; for (const auto &pair : node_nics) { hwloc_obj_t nic_node = pair.first; diff --git a/src/utils/libfabric/libfabric_topology.h b/src/utils/libfabric/libfabric_topology.h index f85bc74e9a..a6466ba521 100644 --- a/src/utils/libfabric/libfabric_topology.h +++ b/src/utils/libfabric/libfabric_topology.h @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,7 @@ #include "libfabric_common.h" #include "nixl.h" #include -#include +#include /** * @brief Topology discovery and management for AWS instances with EFA devices @@ -32,8 +32,8 @@ */ class nixlLibfabricTopology { private: - // GPU to EFA device mapping: GPU 0→[efa0,efa1], GPU 1→[efa2,efa3], etc. - std::map> gpu_to_efa_devices; + // PCI bus ID to EFA device mapping: "0000:72:00.0"→[efa0,efa1], etc. + std::unordered_map> pci_to_efa_devices; // All available network devices discovered on this system std::vector all_devices; @@ -53,8 +53,8 @@ class nixlLibfabricTopology { hwloc_topology_t hwloc_topology; // PCIe to Libfabric device mapping - std::map pcie_to_libfabric_map; - std::map libfabric_to_pcie_map; + std::unordered_map pcie_to_libfabric_map; + std::unordered_map libfabric_to_pcie_map; // Helper methods nixl_status_t @@ -127,7 +127,7 @@ class nixlLibfabricTopology { // GPU-based queries (main interface) std::vector - getEfaDevicesForGpu(int gpu_id) const; + getEfaDevicesForGPUPci(const std::string &pci_bus_id) const; // System information int @@ -151,8 +151,6 @@ class nixlLibfabricTopology { return topology_discovered; } - bool - isValidGpuId(int gpu_id) const; bool isValidDevice(const std::string &efa_device) const; diff --git a/test/unit/utils/libfabric/libfabric_topology_test.cpp b/test/unit/utils/libfabric/libfabric_topology_test.cpp index 1352cf588e..eafc3e2fc5 100644 --- a/test/unit/utils/libfabric/libfabric_topology_test.cpp +++ b/test/unit/utils/libfabric/libfabric_topology_test.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,14 +44,31 @@ main() { NIXL_INFO << "3. Testing GPU-specific queries (detected " << num_gpus << " GPUs)..."; int test_gpus = std::min(num_gpus, 3); // Test up to 3 GPUs or all available for (int gpu_id = 0; gpu_id < test_gpus; ++gpu_id) { - auto gpu_devices = topology.getEfaDevicesForGpu(gpu_id); +#ifdef CUDA_FOUND + // Get PCI bus ID for this GPU + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, gpu_id); + + char pci_bus_id[32]; + snprintf(pci_bus_id, + sizeof(pci_bus_id), + "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); + + auto gpu_devices = topology.getEfaDevicesForGPUPci(pci_bus_id); + std::string device_list; for (const auto &device : gpu_devices) { if (!device_list.empty()) device_list += " "; device_list += device; } - NIXL_INFO << " GPU " << gpu_id << " mapped to " << gpu_devices.size() - << " EFA devices: " << device_list; + NIXL_INFO << " GPU " << gpu_id << " (PCI: " << pci_bus_id << ") mapped to " + << gpu_devices.size() << " EFA devices: " << device_list; +#else + NIXL_INFO << " Skipping GPU " << gpu_id << " (CUDA not available)"; +#endif } } else { NIXL_INFO << "3. Skipping GPU-specific tests (no GPUs detected)";