diff --git a/src/plugins/libfabric/libfabric_backend.cpp b/src/plugins/libfabric/libfabric_backend.cpp
index 5813e88656..c15b3b44f7 100644
--- a/src/plugins/libfabric/libfabric_backend.cpp
+++ b/src/plugins/libfabric/libfabric_backend.cpp
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -56,7 +56,7 @@
 
 #ifdef HAVE_CUDA
 static int
-cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx) {
+cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx, std::string &pci_bus_id) {
     CUmemorytype mem_type = CU_MEMORYTYPE_HOST;
     uint32_t is_managed = 0;
     CUpointer_attribute attr_type[4];
@@ -75,6 +75,19 @@ cudaQueryAddr(void *address, bool &is_dev, CUdevice &dev, CUcontext &ctx) {
     result = cuPointerGetAttributes(4, attr_type, attr_data, (CUdeviceptr)address);
     is_dev = (mem_type == CU_MEMORYTYPE_DEVICE);
 
+    // Get PCI bus ID if device memory
+    if (result == CUDA_SUCCESS && is_dev) {
+        char pci_buf[32];
+        CUresult pci_result = cuDeviceGetPCIBusId(pci_buf, sizeof(pci_buf), dev);
+        if (pci_result == CUDA_SUCCESS) {
+            pci_bus_id = std::string(pci_buf);
+        } else {
+            pci_bus_id = "";
+        }
+    } else {
+        pci_bus_id = "";
+    }
+
     return (CUDA_SUCCESS != result);
 }
 
@@ -89,6 +102,7 @@ nixlLibfabricCudaCtx::cudaUpdateCtxPtr(void *address, int expected_dev, bool &wa
     bool is_dev;
     CUdevice dev;
     CUcontext ctx;
+    std::string pci_bus_id; // Not used here, but required by cudaQueryAddr
     int ret;
 
     was_updated = false;
@@ -96,7 +110,7 @@ nixlLibfabricCudaCtx::cudaUpdateCtxPtr(void *address, int expected_dev, bool &wa
     if (expected_dev == -1) return -1;
     if (myDevId_ != -1 && expected_dev != myDevId_) return -1;
 
-    ret = cudaQueryAddr(address, is_dev, dev, ctx);
+    ret = cudaQueryAddr(address, is_dev, dev, ctx, pci_bus_id);
     if (ret) return ret;
     if (!is_dev) return 0;
     if (dev != expected_dev) return -1;
@@ -734,6 +748,7 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem,
     priv->length_ = mem.len;
     priv->gpu_device_id_ = mem.devId; // Store GPU device ID
 
+    std::string pci_bus_id = "";
 #ifdef HAVE_CUDA
     // Handle CUDA memory registration with GPU Direct RDMA support
     if (nixl_mem == VRAM_SEG) {
@@ -760,6 +775,19 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem,
             }
             NIXL_DEBUG << "Set CUDA device context to GPU " << mem.devId;
         }
+
+        // Query PCI bus ID from memory address (AFTER setting context)
+        bool is_dev;
+        CUdevice dev;
+        CUcontext ctx;
+
+        int ret = cudaQueryAddr((void *)mem.addr, is_dev, dev, ctx, pci_bus_id);
+        if (ret || !is_dev) {
+            NIXL_ERROR << "Failed to query device from memory " << (void *)mem.addr;
+            return NIXL_ERR_BACKEND;
+        }
+
+        NIXL_DEBUG << "Queried PCI bus ID: " << pci_bus_id << " for GPU " << mem.devId;
     }
 #endif
 
@@ -777,12 +805,14 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem,
 
     // Use Rail Manager for centralized memory registration with GPU Direct RDMA support
     NIXL_TRACE << "Registering memory: addr=" << (void *)mem.addr << " len=" << mem.len
-               << " mem_type=" << nixl_mem << " devId=" << mem.devId;
+               << " mem_type=" << nixl_mem << " devId=" << mem.devId
+               << (nixl_mem == VRAM_SEG ? " pci_bus_id=" + pci_bus_id : "");
 
     nixl_status_t status = rail_manager.registerMemory((void *)mem.addr,
                                                        mem.len,
                                                        nixl_mem,
                                                        mem.devId,
+                                                       pci_bus_id,
                                                        priv->rail_mr_list_,
                                                        priv->rail_key_list_,
                                                        priv->selected_rails_);
diff --git a/src/utils/libfabric/libfabric_rail_manager.cpp b/src/utils/libfabric/libfabric_rail_manager.cpp
index 5bf23ae165..7bb23dba22 100644
--- a/src/utils/libfabric/libfabric_rail_manager.cpp
+++ b/src/utils/libfabric/libfabric_rail_manager.cpp
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -21,6 +21,7 @@
 #include "libfabric/libfabric_topology.h"
 #include "common/nixl_log.h"
 #include "serdes/serdes.h"
+#include <sstream>
 
 // Forward declaration for LibfabricUtils namespace
 namespace LibfabricUtils {
@@ -46,6 +47,7 @@ nixlLibfabricRailManager::nixlLibfabricRailManager(size_t striping_threshold)
 
     // Get network devices from topology and create rails automatically
     std::vector<std::string> all_devices = topology->getAllDevices();
+
     std::string selected_provider_name = topology->getProviderName();
 
     NIXL_DEBUG << "Got " << all_devices.size()
@@ -321,16 +323,25 @@ nixlLibfabricRailManager::prepareAndSubmitTransfer(
 std::vector<size_t>
 nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr,
                                                nixl_mem_t mem_type,
-                                               int gpu_id) const {
+                                               int gpu_id,
+                                               const std::string &gpu_pci_bus_id) const {
     if (mem_type == VRAM_SEG) {
 #ifdef HAVE_CUDA
         if (gpu_id < 0) {
             NIXL_ERROR << "Invalid GPU ID " << gpu_id << " for VRAM memory " << mem_addr;
             return {}; // Return empty vector to indicate failure
         }
-        std::vector<std::string> gpu_efa_devices = topology->getEfaDevicesForGpu(gpu_id);
+
+        // Use PCI bus ID provided by caller (queried in backend layer)
+        if (gpu_pci_bus_id.empty()) {
+            NIXL_ERROR << "Empty PCI bus ID provided for VRAM memory " << mem_addr;
+            return {}; // Return empty vector to indicate failure
+        }
+
+        // Get EFA devices for this PCI bus ID
+        std::vector<std::string> gpu_efa_devices = topology->getEfaDevicesForGPUPci(gpu_pci_bus_id);
         if (gpu_efa_devices.empty()) {
-            NIXL_ERROR << "No EFA devices found for GPU " << gpu_id;
+            NIXL_ERROR << "No EFA devices found for PCI " << gpu_pci_bus_id;
             return {}; // Return empty vector to indicate failure
         }
         std::vector<size_t> gpu_rails;
@@ -340,7 +351,7 @@ nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr,
                 // Bounds check: ensure rail index is valid
                 if (it->second < data_rails_.size()) {
                     gpu_rails.push_back(it->second);
-                    NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id
+                    NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU-PCI " << gpu_pci_bus_id
                                << " mapped to rail " << it->second << " (EFA device=" << efa_device
                                << ")";
                 } else {
@@ -348,18 +359,18 @@ nixlLibfabricRailManager::selectRailsForMemory(void *mem_addr,
                               << " but only " << data_rails_.size() << " rails available";
                 }
             } else {
-                NIXL_WARN << "EFA device " << efa_device << " not found in rail mapping for GPU "
-                          << gpu_id;
+                NIXL_WARN << "EFA device " << efa_device
+                          << " not found in rail mapping for GPU-PCI " << gpu_pci_bus_id;
             }
         }
 
         if (gpu_rails.empty()) {
-            NIXL_ERROR << "No valid rail mapping found for GPU " << gpu_id << " (checked "
-                       << gpu_efa_devices.size() << " EFA devices)";
+            NIXL_ERROR << "No valid rail mapping found for GPU-PCI " << gpu_pci_bus_id
+                       << " (checked " << gpu_efa_devices.size() << " EFA devices)";
             return {};
         }
 
-        NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU " << gpu_id << " will use "
+        NIXL_DEBUG << "VRAM memory " << mem_addr << " on GPU-PCI " << gpu_pci_bus_id << " will use "
                    << gpu_rails.size() << " rails total";
         return gpu_rails;
 #else
@@ -390,6 +401,7 @@ nixlLibfabricRailManager::registerMemory(void *buffer,
                                          size_t length,
                                          nixl_mem_t mem_type,
                                          int gpu_id,
+                                         const std::string &gpu_pci_bus_id,
                                          std::vector<struct fid_mr *> &mr_list_out,
                                          std::vector<uint64_t> &key_list_out,
                                          std::vector<size_t> &selected_rails_out) {
@@ -398,8 +410,11 @@ nixlLibfabricRailManager::registerMemory(void *buffer,
         return NIXL_ERR_INVALID_PARAM;
     }
 
-    // Use internal rail selection with explicit GPU ID
-    std::vector<size_t> selected_rails = selectRailsForMemory(buffer, mem_type, gpu_id);
+    // Select rails based on memory type and PCI bus ID
+    // For VRAM: uses PCI bus ID provided by backend to map to topology-aware rails
+    // For DRAM: uses all available rails
+    std::vector<size_t> selected_rails =
+        selectRailsForMemory(buffer, mem_type, gpu_id, gpu_pci_bus_id);
     if (selected_rails.empty()) {
         NIXL_ERROR << "No rails selected for memory type " << mem_type;
         return NIXL_ERR_NOT_SUPPORTED;
@@ -429,6 +444,7 @@ nixlLibfabricRailManager::registerMemory(void *buffer,
 
         struct fid_mr *mr;
         uint64_t key;
+        // Pass gpu_id parameter to individual rail's registerMemory calls
         nixl_status_t status =
             data_rails_[rail_idx]->registerMemory(buffer, length, mem_type, gpu_id, &mr, &key);
         if (status != NIXL_SUCCESS) {
diff --git a/src/utils/libfabric/libfabric_rail_manager.h b/src/utils/libfabric/libfabric_rail_manager.h
index 9baca7c18f..b713f30f24 100644
--- a/src/utils/libfabric/libfabric_rail_manager.h
+++ b/src/utils/libfabric/libfabric_rail_manager.h
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -110,6 +110,7 @@ class nixlLibfabricRailManager {
      * @param length Buffer size in bytes
      * @param mem_type Memory type (DRAM_SEG or VRAM_SEG)
      * @param gpu_id GPU device ID (used for VRAM_SEG, ignored for DRAM_SEG)
+     * @param gpu_pci_bus_id PCI bus ID for VRAM-GPU (queried in backend layer), empty for DRAM
      * @param mr_list_out Memory registration handles, indexed by rail ID
      * @param key_list_out Remote access keys, indexed by rail ID
      * @param selected_rails_out List of rail IDs where memory was registered
@@ -120,6 +121,7 @@ class nixlLibfabricRailManager {
                    size_t length,
                    nixl_mem_t mem_type,
                    int gpu_id,
+                   const std::string &gpu_pci_bus_id,
                    std::vector<struct fid_mr *> &mr_list_out,
                    std::vector<uint64_t> &key_list_out,
                    std::vector<size_t> &selected_rails_out);
@@ -316,7 +318,10 @@ class nixlLibfabricRailManager {
 
     // Internal rail selection method
     std::vector<size_t>
-    selectRailsForMemory(void *mem_addr, nixl_mem_t mem_type, int gpu_id) const;
+    selectRailsForMemory(void *mem_addr,
+                         nixl_mem_t mem_type,
+                         int gpu_id,
+                         const std::string &pci_bus_id = "") const;
 
     // Helper functions for connection SerDes
     void
diff --git a/src/utils/libfabric/libfabric_topology.cpp b/src/utils/libfabric/libfabric_topology.cpp
index ce86ef3db1..a2c673afcf 100644
--- a/src/utils/libfabric/libfabric_topology.cpp
+++ b/src/utils/libfabric/libfabric_topology.cpp
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -135,18 +135,37 @@ nixlLibfabricTopology::discoverEfaDevices() {
 }
 
 std::vector<std::string>
-nixlLibfabricTopology::getEfaDevicesForGpu(int gpu_id) const {
-    auto it = gpu_to_efa_devices.find(gpu_id);
-    if (it != gpu_to_efa_devices.end()) {
-        return it->second;
+nixlLibfabricTopology::getEfaDevicesForGPUPci(const std::string &pci_bus_id) const {
+    // Normalize PCI bus ID format to match hwloc format
+    // CUDA format: "0000:59:00.0" → hwloc format: "0:59:00.0"
+    unsigned int domain, bus, device, function;
+    if (sscanf(pci_bus_id.c_str(), "%x:%x:%x.%x", &domain, &bus, &device, &function) == 4) {
+        char normalized_pci[32];
+        snprintf(normalized_pci,
+                 sizeof(normalized_pci),
+                 "%x:%02x:%02x.%x",
+                 domain,
+                 bus,
+                 device,
+                 function);
+        std::string normalized_id(normalized_pci);
+
+        auto it = pci_to_efa_devices.find(normalized_id);
+        if (it != pci_to_efa_devices.end()) {
+            NIXL_DEBUG << "Found EFA devices for PCI " << pci_bus_id << " (normalized to "
+                       << normalized_id << ")";
+            return it->second;
+        }
+        // PCI ID parsed successfully but not found in mapping
+        NIXL_WARN << "PCI bus ID " << pci_bus_id << " (normalized to " << normalized_id
+                  << ") not found in GPU-EFA mapping, returning all devices";
+    } else {
+        // Failed to parse PCI bus ID format
+        NIXL_WARN << "Failed to parse PCI bus ID format: " << pci_bus_id
+                  << ", returning all devices";
     }
-    NIXL_WARN << "No EFA devices found for GPU " << gpu_id << ", returning all devices";
-    return all_devices;
-}
 
-bool
-nixlLibfabricTopology::isValidGpuId(int gpu_id) const {
-    return gpu_id >= 0 && gpu_id < num_gpus;
+    return all_devices;
 }
 
 bool
@@ -165,10 +184,10 @@ nixlLibfabricTopology::printTopologyInfo() const {
     for (size_t i = 0; i < all_devices.size(); ++i) {
         NIXL_TRACE << "  [" << i << "] " << all_devices[i];
     }
-    NIXL_TRACE << "GPU → EFA mapping:";
-    for (const auto &pair : gpu_to_efa_devices) {
+    NIXL_TRACE << "GPU-PCI → EFA mapping:";
+    for (const auto &pair : pci_to_efa_devices) {
         std::stringstream ss;
-        ss << "  GPU " << pair.first << " → [";
+        ss << "  GPU-PCI " << pair.first << " → [";
         for (size_t i = 0; i < pair.second.size(); ++i) {
             if (i > 0) ss << ", ";
             ss << pair.second[i];
@@ -423,7 +442,7 @@ nixlLibfabricTopology::buildPcieToLibfabricMapping() {
 
 nixl_status_t
 nixlLibfabricTopology::buildGpuToEfaMapping() {
-    gpu_to_efa_devices.clear();
+    pci_to_efa_devices.clear();
     // Implement NIXL's topology-aware GPU-EFA grouping algorithm
     nixl_status_t status = buildTopologyAwareGrouping();
     if (status != NIXL_SUCCESS) {
@@ -431,7 +450,7 @@ nixlLibfabricTopology::buildGpuToEfaMapping() {
         return buildFallbackMapping();
     }
 
-    NIXL_TRACE << "Built GPU→EFA mapping for " << gpu_to_efa_devices.size()
+    NIXL_TRACE << "Built PCI→EFA mapping for " << pci_to_efa_devices.size()
                << " GPUs using topology-aware algorithm";
 
     return NIXL_SUCCESS;
@@ -527,13 +546,17 @@ nixlLibfabricTopology::buildTopologyAwareGrouping() {
             }
 
             if (gpu_index >= 0) {
-                gpu_to_efa_devices[gpu_index] = gpu_efa_devices;
-
-                NIXL_TRACE << "GPU " << gpu_index << " (" << std::hex << group.closest_gpu.domain_id
-                           << ":" << static_cast<int>(group.closest_gpu.bus_id) << ":"
-                           << static_cast<int>(group.closest_gpu.device_id) << "."
-                           << static_cast<int>(group.closest_gpu.function_id) << std::dec << ") → "
-                           << gpu_efa_devices.size() << " EFA devices";
+                // Store mapping using PCI bus ID as key
+                std::string pci_bus_id = getPcieAddressFromHwlocObj(group.closest_gpu.hwloc_node);
+                pci_to_efa_devices[pci_bus_id] = gpu_efa_devices;
+
+                NIXL_TRACE << "PCI " << pci_bus_id << " (GPU " << gpu_index << ") → "
+                           << gpu_efa_devices.size() << " EFA devices: [";
+                for (size_t i = 0; i < gpu_efa_devices.size(); ++i) {
+                    if (i > 0) NIXL_TRACE << ", ";
+                    NIXL_TRACE << gpu_efa_devices[i];
+                }
+                NIXL_TRACE << "]";
             }
         }
     }
@@ -543,15 +566,12 @@ nixlLibfabricTopology::buildTopologyAwareGrouping() {
 nixl_status_t
 nixlLibfabricTopology::buildFallbackMapping() {
     // Fallback: if specific mapping failed, use simple approach
-    gpu_to_efa_devices.clear();
-    // Give all devices to all GPUs (not optimal but functional)
-    for (int gpu_id = 0; gpu_id < num_gpus; ++gpu_id) {
-        gpu_to_efa_devices[gpu_id] = all_devices;
-    }
+    // We can't build PCI-based mapping without topology, so just return success
+    // getEfaDevicesForPci() will return all_devices when no mapping is found
+    NIXL_WARN << "Using fallback: all GPUs will use all available EFA devices";
     return NIXL_SUCCESS;
 }
 
-
 // hwloc helper methods
 
 std::string
@@ -607,8 +627,8 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector<NicInfo> &discovered_
     // Implement NIXL's topology-aware NIC grouping algorithm
 
     // Step 1: Mark topology nodes that have NICs in their subtree
-    std::map<hwloc_obj_t, int> node_group_counts;
-    std::map<hwloc_obj_t, std::vector<NicInfo>> node_nics;
+    std::unordered_map<hwloc_obj_t, int> node_group_counts;
+    std::unordered_map<hwloc_obj_t, std::vector<NicInfo>> node_nics;
     std::set<hwloc_obj_t> nic_subtree_nodes;
     // Mark all nodes that have NICs in their subtree and collect NICs per node
     for (const auto &nic : discovered_nics) {
@@ -621,7 +641,7 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector<NicInfo> &discovered_
     }
 
     // Step 2: For each GPU, walk up until finding a NIC subtree node and increment its count
-    std::map<hwloc_obj_t, std::vector<GpuInfo>> node_gpus;
+    std::unordered_map<hwloc_obj_t, std::vector<GpuInfo>> node_gpus;
 
     for (const auto &gpu : discovered_gpus) {
         hwloc_obj_t node = gpu.hwloc_node;
@@ -637,7 +657,7 @@ nixlLibfabricTopology::groupNicsWithGpus(const std::vector<NicInfo> &discovered_
     }
 
     // Step 3: Collect all NICs that need to be grouped and assign them to ancestor nodes
-    std::map<hwloc_obj_t, std::vector<NicInfo>> ancestor_nics;
+    std::unordered_map<hwloc_obj_t, std::vector<NicInfo>> ancestor_nics;
 
     for (const auto &pair : node_nics) {
         hwloc_obj_t nic_node = pair.first;
diff --git a/src/utils/libfabric/libfabric_topology.h b/src/utils/libfabric/libfabric_topology.h
index f85bc74e9a..a6466ba521 100644
--- a/src/utils/libfabric/libfabric_topology.h
+++ b/src/utils/libfabric/libfabric_topology.h
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -21,7 +21,7 @@
 #include "libfabric_common.h"
 #include "nixl.h"
 #include <hwloc.h>
-#include <map>
+#include <unordered_map>
 
 /**
  * @brief Topology discovery and management for AWS instances with EFA devices
@@ -32,8 +32,8 @@
  */
 class nixlLibfabricTopology {
 private:
-    // GPU to EFA device mapping: GPU 0→[efa0,efa1], GPU 1→[efa2,efa3], etc.
-    std::map<int, std::vector<std::string>> gpu_to_efa_devices;
+    // PCI bus ID to EFA device mapping: "0000:72:00.0"→[efa0,efa1], etc.
+    std::unordered_map<std::string, std::vector<std::string>> pci_to_efa_devices;
 
     // All available network devices discovered on this system
     std::vector<std::string> all_devices;
@@ -53,8 +53,8 @@ class nixlLibfabricTopology {
     hwloc_topology_t hwloc_topology;
 
     // PCIe to Libfabric device mapping
-    std::map<std::string, std::string> pcie_to_libfabric_map;
-    std::map<std::string, std::string> libfabric_to_pcie_map;
+    std::unordered_map<std::string, std::string> pcie_to_libfabric_map;
+    std::unordered_map<std::string, std::string> libfabric_to_pcie_map;
 
     // Helper methods
     nixl_status_t
@@ -127,7 +127,7 @@ class nixlLibfabricTopology {
 
     // GPU-based queries (main interface)
     std::vector<std::string>
-    getEfaDevicesForGpu(int gpu_id) const;
+    getEfaDevicesForGPUPci(const std::string &pci_bus_id) const;
 
     // System information
     int
@@ -151,8 +151,6 @@ class nixlLibfabricTopology {
         return topology_discovered;
     }
 
-    bool
-    isValidGpuId(int gpu_id) const;
     bool
     isValidDevice(const std::string &efa_device) const;
 
diff --git a/test/unit/utils/libfabric/libfabric_topology_test.cpp b/test/unit/utils/libfabric/libfabric_topology_test.cpp
index 1352cf588e..eafc3e2fc5 100644
--- a/test/unit/utils/libfabric/libfabric_topology_test.cpp
+++ b/test/unit/utils/libfabric/libfabric_topology_test.cpp
@@ -1,6 +1,6 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-FileCopyrightText: Copyright (c) 2025 Amazon.com, Inc. and affiliates.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 Amazon.com, Inc. and affiliates.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -44,14 +44,31 @@ main() {
             NIXL_INFO << "3. Testing GPU-specific queries (detected " << num_gpus << " GPUs)...";
             int test_gpus = std::min(num_gpus, 3); // Test up to 3 GPUs or all available
             for (int gpu_id = 0; gpu_id < test_gpus; ++gpu_id) {
-                auto gpu_devices = topology.getEfaDevicesForGpu(gpu_id);
+#ifdef CUDA_FOUND
+                // Get PCI bus ID for this GPU
+                cudaDeviceProp prop;
+                cudaGetDeviceProperties(&prop, gpu_id);
+
+                char pci_bus_id[32];
+                snprintf(pci_bus_id,
+                         sizeof(pci_bus_id),
+                         "%04x:%02x:%02x.0",
+                         prop.pciDomainID,
+                         prop.pciBusID,
+                         prop.pciDeviceID);
+
+                auto gpu_devices = topology.getEfaDevicesForGPUPci(pci_bus_id);
+
                 std::string device_list;
                 for (const auto &device : gpu_devices) {
                     if (!device_list.empty()) device_list += " ";
                     device_list += device;
                 }
-                NIXL_INFO << "   GPU " << gpu_id << " mapped to " << gpu_devices.size()
-                          << " EFA devices: " << device_list;
+                NIXL_INFO << "   GPU " << gpu_id << " (PCI: " << pci_bus_id << ") mapped to "
+                          << gpu_devices.size() << " EFA devices: " << device_list;
+#else
+                NIXL_INFO << "   Skipping GPU " << gpu_id << " (CUDA not available)";
+#endif
             }
         } else {
             NIXL_INFO << "3. Skipping GPU-specific tests (no GPUs detected)";