diff --git a/.gitlab/test_cpp.sh b/.gitlab/test_cpp.sh index d119cbd0bb..074efb0891 100755 --- a/.gitlab/test_cpp.sh +++ b/.gitlab/test_cpp.sh @@ -1,5 +1,5 @@ #!/bin/sh -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/.gitlab/test_nixlbench.sh b/.gitlab/test_nixlbench.sh index 575602726f..116a6e666c 100755 --- a/.gitlab/test_nixlbench.sh +++ b/.gitlab/test_nixlbench.sh @@ -1,5 +1,5 @@ #!/bin/sh -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/.gitlab/test_python.sh b/.gitlab/test_python.sh index 3aec4c2795..fe51cff457 100755 --- a/.gitlab/test_python.sh +++ b/.gitlab/test_python.sh @@ -1,5 +1,5 @@ #!/bin/sh -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmark/nixlbench/src/runtime/etcd/etcd_rt.cpp b/benchmark/nixlbench/src/runtime/etcd/etcd_rt.cpp index 9a840954d5..872c44e07f 100644 --- a/benchmark/nixlbench/src/runtime/etcd/etcd_rt.cpp +++ b/benchmark/nixlbench/src/runtime/etcd/etcd_rt.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/cpp/nixl_etcd_example.cpp b/examples/cpp/nixl_etcd_example.cpp index 07f383a2b7..db04edd2f2 100644 --- a/examples/cpp/nixl_etcd_example.cpp +++ b/examples/cpp/nixl_etcd_example.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/cpp/telemetry_reader.cpp b/examples/cpp/telemetry_reader.cpp index facad1f668..344ce4724d 100644 --- a/examples/cpp/telemetry_reader.cpp +++ b/examples/cpp/telemetry_reader.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/python/remote_storage_example/nixl_p2p_storage_example.py b/examples/python/remote_storage_example/nixl_p2p_storage_example.py index a0b19c71de..01366d3468 100644 --- a/examples/python/remote_storage_example/nixl_p2p_storage_example.py +++ b/examples/python/remote_storage_example/nixl_p2p_storage_example.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/python/telemetry_reader.py b/examples/python/telemetry_reader.py index 150897bf67..e730b861bd 100755 --- a/examples/python/telemetry_reader.py +++ b/examples/python/telemetry_reader.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/plugins/gpunetio/gpunetio_backend.cpp b/src/plugins/gpunetio/gpunetio_backend.cpp index d0e44138bc..901313c1f1 100644 --- a/src/plugins/gpunetio/gpunetio_backend.cpp +++ b/src/plugins/gpunetio/gpunetio_backend.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,15 +17,18 @@ #include "gpunetio_backend.h" #include "serdes/serdes.h" -#include #include #include +#include +#include #include #include "common/nixl_log.h" #include const char info_delimiter = '-'; +extern "C" int +doca_query_mkey_swapped(); /**************************************** * Constructor/Destructor *****************************************/ @@ -47,6 +50,21 @@ nixlDocaEngine::nixlDocaEngine(const nixlBackendInitParams *init_params) result = doca_log_backend_set_sdk_level(sdk_log, DOCA_LOG_LEVEL_ERROR); if (result != DOCA_SUCCESS) throw std::invalid_argument("Can't initialize doca log"); + // Detect device-side expectation for key swapping, allow env override + int device_mkey_swapped = -1; + device_mkey_swapped = doca_query_mkey_swapped(); + if (device_mkey_swapped >= 0) swap_keys_config = (device_mkey_swapped == 1); + const char *env_swap = std::getenv("NIXL_GPUNETIO_SWAP_KEYS"); + if (env_swap != nullptr) { + if (std::string(env_swap) == "0" || std::string(env_swap) == "false") + swap_keys_config = false; + else + swap_keys_config = true; + } + NIXL_DEBUG << "GPUNETIO key byte-order swap (htonl): " + << (swap_keys_config ? "enabled" : "disabled") << ", device expects swapped: " + << (device_mkey_swapped >= 0 ? device_mkey_swapped : -1); + NIXL_INFO << "DOCA network devices "; // Temporary: will extend to more GPUs in a dedicated PR if (custom_params->count("network_devices") > 1) @@ -179,6 +197,15 @@ nixlDocaEngine::nixlDocaEngine(const nixlBackendInitParams *init_params) } else { doca_devinfo_get_ipv4_addr( doca_dev_as_devinfo(ddev), (uint8_t *)ipv4_addr, DOCA_DEVINFO_IPV4_ADDR_SIZE); + if (const char *override_ip = std::getenv("NIXL_DOCASIM_IPV4_OVERRIDE")) { + struct in_addr addr_override = {}; + if (inet_pton(AF_INET, override_ip, &addr_override) == 1) { + std::memcpy(ipv4_addr, &addr_override.s_addr, sizeof(ipv4_addr)); + NIXL_INFO << "DOCA IP override applied from env " << override_ip; + } else { + NIXL_WARN << "Invalid IPv4 override in NIXL_DOCASIM_IPV4_OVERRIDE: " << override_ip; + } + } NIXL_DEBUG << "DOCA IP address " << static_cast(ipv4_addr[0]) << " " << static_cast(ipv4_addr[1]) << " " << static_cast(ipv4_addr[2]) << " " @@ -448,7 +475,10 @@ nixlDocaEngine::nixlDocaInitNotif(const std::string &remote_agent, doca_dev *dev // Ensure notif list is not added twice for the same peer notifMap[remote_agent] = notif; ((volatile struct docaNotif *)notif_fill_cpu)->msg_buf = (uintptr_t)notif->recv_addr; - ((volatile struct docaNotif *)notif_fill_cpu)->msg_lkey = notif->recv_mr->get_lkey(); + ((volatile struct docaNotif *)notif_fill_cpu)->msg_lkey = + swap_keys_config ? htonl(notif->recv_mr->get_lkey()) : notif->recv_mr->get_lkey(); + ((volatile struct docaNotif *)notif_fill_cpu)->keys_are_swapped = swap_keys_config ? 1 : 0; + ((volatile struct docaNotif *)notif_fill_cpu)->msg_size = notif->elems_size; std::atomic_thread_fence(std::memory_order_seq_cst); ((volatile struct docaNotif *)notif_fill_cpu)->qp_gpu = @@ -513,8 +543,10 @@ nixlDocaEngine::progressThreadStart() { /* Set port and IP: */ server_addr.sin_family = AF_INET; server_addr.sin_port = htons(DOCA_RDMA_CM_LOCAL_PORT_SERVER); - server_addr.sin_addr.s_addr = INADDR_ANY; /* listen on any interface */ - + // server_addr.sin_addr.s_addr = INADDR_ANY; + /* listen on any interface */ + std::memcpy(&server_addr.sin_addr.s_addr, ipv4_addr, sizeof(ipv4_addr)); + if (server_addr.sin_addr.s_addr == 0) server_addr.sin_addr.s_addr = INADDR_ANY; /* Bind to the set port and IP: */ if (bind(oob_sock_server, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) { NIXL_ERROR << "Couldn't bind to the port " << DOCA_RDMA_CM_LOCAL_PORT_SERVER; @@ -929,6 +961,8 @@ nixlDocaEngine::getConnInfo(std::string &str) const { ss << (int)ipv4_addr[0] << "." << (int)ipv4_addr[1] << "." << (int)ipv4_addr[2] << "." << (int)ipv4_addr[3]; str = ss.str(); + NIXL_DEBUG << "getConnInfo DOCA: " << str; + return NIXL_SUCCESS; } @@ -1025,6 +1059,16 @@ nixlDocaEngine::registerMem(const nixlBlobDesc &mem, << info_delimiter << ((size_t)priv->mr->get_tot_size()); priv->remoteMrStr = ss.str(); + uint32_t lkey = priv->mr->get_lkey(); + uint32_t rkey = priv->mr->get_rkey(); + NIXL_INFO << "GPUNETIO registerMem publish dev " << priv->devId << " addr " << std::showbase + << std::hex << std::uppercase << (uintptr_t)priv->mr->get_addr() << " len " + << (uint64_t)priv->mr->get_tot_size() << " lkey " << lkey << " rkey " << rkey + << std::noshowbase << std::dec; + NIXL_DEBUG << "[dbg] publish raw: dev=" << priv->devId + << " addr_dec=" << (uintptr_t)priv->mr->get_addr() + << " len_dec=" << (uint64_t)priv->mr->get_tot_size() << " lkey_dec=" << lkey + << " rkey_dec=" << rkey; out = (nixlBackendMD *)priv; return NIXL_SUCCESS; @@ -1043,6 +1087,8 @@ nixl_status_t nixlDocaEngine::getPublicData(const nixlBackendMD *meta, std::string &str) const { const nixlDocaPrivateMetadata *priv = (nixlDocaPrivateMetadata *)meta; str = priv->remoteMrStr; + NIXL_TRACE << "[dbg] getPublicData remoteMrStr=" << str; + return NIXL_SUCCESS; } @@ -1071,10 +1117,27 @@ nixlDocaEngine::loadRemoteMD(const nixlBlobDesc &input, std::stringstream ss(input.metaInfo.data()); while (std::getline(ss, token, info_delimiter)) tokens.push_back(token); - - uint32_t rkey = static_cast(atoi(tokens[0].c_str())); - uintptr_t addr = static_cast(atol(tokens[1].c_str())); - size_t tot_size = static_cast(atol(tokens[2].c_str())); + // Parse as unsigned to avoid overflow/truncation (rkeys often exceed INT_MAX) + NIXL_TRACE << "[dbg] loadRemoteMD tokens size=" << tokens.size(); + for (size_t i = 0; i < tokens.size(); ++i) + NIXL_TRACE << "[dbg] token[" << i << "]=" << tokens[i]; + const char *p0 = tokens[0].c_str(); + const char *p1 = tokens[1].c_str(); + const char *p2 = tokens[2].c_str(); + errno = 0; + unsigned long rkey_ul = strtoul(p0, nullptr, 10); + unsigned long long addr_ull = strtoull(p1, nullptr, 10); + unsigned long long size_ull = strtoull(p2, nullptr, 10); + if (errno != 0) { + NIXL_ERROR << "Failed to parse remote metadata (errno=" << errno << ")"; + return NIXL_ERR_INVALID_PARAM; + } + uint32_t rkey = static_cast(rkey_ul); + uintptr_t addr = static_cast(addr_ull); + size_t tot_size = static_cast(size_ull); + NIXL_TRACE << "[dbg] parsed remote MD rkey=" << std::showbase << std::hex << std::uppercase + << rkey << " addr=" << addr << " size=" << (uint64_t)tot_size << std::noshowbase + << std::dec; // Empty mmap, filled with imported data try { @@ -1145,6 +1208,8 @@ nixlDocaEngine::prepXfer(const nixl_xfer_op_t &operation, pos = treq->start_pos; do { + xferReqRingCpu[pos].keys_are_swapped = swap_keys_config ? 1 : 0; + for (uint32_t idx = 0; idx < lcnt && idx < DOCA_XFER_REQ_SIZE; idx++) { size_t lsize = local[idx].len; size_t rsize = remote[idx].len; @@ -1153,12 +1218,31 @@ nixlDocaEngine::prepXfer(const nixl_xfer_op_t &operation, lmd = (nixlDocaPrivateMetadata *)local[idx].metadataP; rmd = (nixlDocaPublicMetadata *)remote[idx].metadataP; - xferReqRingCpu[pos].lbuf[idx] = (uintptr_t)lmd->mr->get_addr(); - xferReqRingCpu[pos].lkey[idx] = (uintptr_t)lmd->mr->get_lkey(); - xferReqRingCpu[pos].rbuf[idx] = (uintptr_t)rmd->mr->get_addr(); - xferReqRingCpu[pos].rkey[idx] = (uintptr_t)rmd->mr->get_rkey(); + uint32_t lkey_host = lmd->mr->get_lkey(); + uint32_t rkey_host = rmd->mr->get_rkey(); + uint32_t lkey_be = swap_keys_config ? htonl(lkey_host) : lkey_host; + uint32_t rkey_be = swap_keys_config ? htonl(rkey_host) : rkey_host; + + // Local buffer: use the descriptor address (within the local MR) + xferReqRingCpu[pos].lbuf[idx] = (uintptr_t)local[idx].addr; + xferReqRingCpu[pos].lkey[idx] = lkey_be; + uintptr_t dbg_rbuf_desc = (uintptr_t)remote[idx].addr; + uintptr_t dbg_rbuf_mr = (uintptr_t)rmd->mr->get_addr(); + // Use the published remote MR address (not the initiator-side descriptor address) + xferReqRingCpu[pos].rbuf[idx] = dbg_rbuf_mr; + xferReqRingCpu[pos].rkey[idx] = rkey_be; xferReqRingCpu[pos].size[idx] = lsize; xferReqRingCpu[pos].num++; + + NIXL_INFO << "GPUNETIO prepXfer queue_pos " << pos << " idx " << idx << " laddr " + << std::showbase << std::hex << std::uppercase + << xferReqRingCpu[pos].lbuf[idx] << " lkey " << lkey_host << " lkey_be " + << lkey_be << " raddr " << xferReqRingCpu[pos].rbuf[idx] << " rkey " + << rkey_host << " rkey_be " << rkey_be << " size " + << (uint64_t)xferReqRingCpu[pos].size[idx] << std::noshowbase << std::dec; + NIXL_TRACE << "[dbg] remote_desc_addr=" << std::showbase << std::hex << std::uppercase + << dbg_rbuf_desc << " remote_mr_addr=" << dbg_rbuf_mr << " used=mr_addr" + << std::noshowbase << std::dec; } xferReqRingCpu[pos].last_rsvd = last_rsvd_flags; @@ -1199,7 +1283,9 @@ nixlDocaEngine::prepXfer(const nixl_xfer_op_t &operation, (notif->send_pi.fetch_add(1) & (notif->elems_num - 1)); xferReqRingCpu[treq->end_pos - 1].msg_sz = newMsg.size(); xferReqRingCpu[treq->end_pos - 1].lbuf_notif = notif_addr; - xferReqRingCpu[treq->end_pos - 1].lkey_notif = notif->send_mr->get_lkey(); + uint32_t notif_lkey_host = notif->send_mr->get_lkey(); + uint32_t notif_lkey_be = swap_keys_config ? htonl(notif_lkey_host) : notif_lkey_host; + xferReqRingCpu[treq->end_pos - 1].lkey_notif = notif_lkey_be; memcpy((void *)notif_addr, newMsg.c_str(), newMsg.size()); @@ -1378,7 +1464,10 @@ nixlDocaEngine::genNotif(const std::string &remote_agent, const std::string &msg std::lock_guard lock(notifSendLock); ((volatile struct docaNotif *)notif_send_cpu)->msg_buf = msg_buf; - ((volatile struct docaNotif *)notif_send_cpu)->msg_lkey = notif->send_mr->get_lkey(); + ((volatile struct docaNotif *)notif_send_cpu)->msg_lkey = + swap_keys_config ? htonl(notif->send_mr->get_lkey()) : notif->send_mr->get_lkey(); + ((volatile struct docaNotif *)notif_send_cpu)->keys_are_swapped = swap_keys_config ? 1 : 0; + ((volatile struct docaNotif *)notif_send_cpu)->msg_size = newMsg.size(); std::atomic_thread_fence(std::memory_order_seq_cst); ((volatile struct docaNotif *)notif_send_cpu)->qp_gpu = diff --git a/src/plugins/gpunetio/gpunetio_backend.h b/src/plugins/gpunetio/gpunetio_backend.h index 9a17efc9f2..915e3c646d 100644 --- a/src/plugins/gpunetio/gpunetio_backend.h +++ b/src/plugins/gpunetio/gpunetio_backend.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -185,6 +185,10 @@ class nixlDocaEngine : public nixlBackendEngine { std::unordered_map connMap; std::unordered_map notifMap; + // Whether to swap (htonl) mkeys before handing to GPU. Default: true. + bool swap_keys_config = true; + // Extra debug dump controls (env-driven) + pthread_t server_thread_id; class nixlDocaBckndReq : public nixlBackendReqH { diff --git a/src/plugins/gpunetio/gpunetio_backend_aux.h b/src/plugins/gpunetio/gpunetio_backend_aux.h index 8fc897f50b..3392adc3f2 100644 --- a/src/plugins/gpunetio/gpunetio_backend_aux.h +++ b/src/plugins/gpunetio/gpunetio_backend_aux.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,6 +87,7 @@ struct docaXferReqGpu { uint32_t rkey[DOCA_XFER_REQ_SIZE]; uint16_t num; uint8_t in_use; + uint8_t keys_are_swapped; uint32_t conn_idx; uint32_t has_notif_msg_idx; uint32_t msg_sz; @@ -119,6 +120,8 @@ struct docaXferCompletion { struct docaNotif { doca_gpu_dev_verbs_qp *qp_gpu; uint32_t msg_lkey; + uint8_t keys_are_swapped; + uint8_t _pad_keys[3]; uintptr_t msg_buf; size_t msg_size; uint32_t msg_num; diff --git a/src/plugins/gpunetio/gpunetio_kernels.cu b/src/plugins/gpunetio/gpunetio_kernels.cu index 7e97cc7ae4..60bc4a42d4 100644 --- a/src/plugins/gpunetio/gpunetio_kernels.cu +++ b/src/plugins/gpunetio/gpunetio_kernels.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,6 +24,16 @@ #define ENABLE_DEBUG 0 +// Expose at runtime whether device code was compiled to expect pre-swapped mkeys +__device__ __constant__ int __doca_mkey_swapped = DOCA_GPUNETIO_VERBS_MKEY_SWAPPED; + +extern "C" int +doca_query_mkey_swapped() { + int v = 0; + if (cudaMemcpyFromSymbol(&v, __doca_mkey_swapped, sizeof(v)) != cudaSuccess) return -1; + return v; +} + __device__ inline void nixl_gpunetio_dev_cq_print_cqe_err(struct mlx5_cqe64 *cqe64) { struct mlx5_err_cqe_ex *err_cqe = (struct mlx5_err_cqe_ex *)cqe64; @@ -39,6 +49,15 @@ nixl_gpunetio_dev_cq_print_cqe_err(struct mlx5_cqe64 *cqe64) { err_cqe->s_wqe_opcode_qpn); } +__device__ static __forceinline__ uint32_t +nixl_prepare_mkey(uint32_t key, uint8_t stored_swapped) { +#if DOCA_GPUNETIO_VERBS_MKEY_SWAPPED == 1 + return stored_swapped ? key : doca_gpu_dev_verbs_bswap32(key); +#else + return stored_swapped ? doca_gpu_dev_verbs_bswap32(key) : key; +#endif +} + /** * @brief [Internal] Poll the Completion Queue (CQ) at a specific index respecting NIXL * requirements. Non-blocking polling, just one-time CQE check. @@ -118,15 +137,30 @@ kernel_read(doca_gpu_dev_verbs_qp *qp, struct docaXferReqGpu *xferReqRing, uint3 wqe_idx = base_wqe_idx + idx; wqe_ptr = doca_gpu_dev_verbs_get_wqe_ptr(qp, wqe_idx); + uint32_t rkey = + nixl_prepare_mkey(xferReqRing[pos].rkey[idx], xferReqRing[pos].keys_are_swapped); + uint32_t lkey = + nixl_prepare_mkey(xferReqRing[pos].lkey[idx], xferReqRing[pos].keys_are_swapped); + doca_gpu_dev_verbs_wqe_prepare_read(qp, wqe_ptr, wqe_idx, cflag, (uint64_t)(xferReqRing[pos].rbuf[idx]), - xferReqRing[pos].rkey[idx], + rkey, (uint64_t)(xferReqRing[pos].lbuf[idx]), - xferReqRing[pos].lkey[idx], + lkey, xferReqRing[pos].size[idx]); +#if ENABLE_DEBUG == 1 + if (qp->need_dump == false) { + printf("prepare_read raddr %lx rkey %x laddr %lx lkey %x size %ld\n", + (uint64_t)(xferReqRing[pos].rbuf[idx]), + rkey, + (uint64_t)(xferReqRing[pos].lbuf[idx]), + lkey, + (uint64_t)xferReqRing[pos].size[idx]); + } +#endif } __syncthreads(); @@ -135,12 +169,15 @@ kernel_read(doca_gpu_dev_verbs_qp *qp, struct docaXferReqGpu *xferReqRing, uint3 wqe_idx++; wqe_ptr = doca_gpu_dev_verbs_get_wqe_ptr(qp, wqe_idx); + uint32_t dump_lkey = nixl_prepare_mkey(xferReqRing[pos].lkey[tot_wqe - 1], + xferReqRing[pos].keys_are_swapped); + doca_gpu_dev_verbs_wqe_prepare_dump(qp, wqe_ptr, wqe_idx, DOCA_GPUNETIO_MLX5_WQE_CTRL_CQ_UPDATE, (uint64_t)(xferReqRing[pos].lbuf[tot_wqe - 1]), - xferReqRing[pos].lkey[tot_wqe - 1], + dump_lkey, 1); } doca_gpu_dev_verbs_mark_wqes_ready(qp, base_wqe_idx, wqe_idx); @@ -183,12 +220,16 @@ kernel_write(doca_gpu_dev_verbs_qp *qp, struct docaXferReqGpu *xferReqRing, uint wqe_idx = base_wqe_idx + idx; wqe_ptr = doca_gpu_dev_verbs_get_wqe_ptr(qp, wqe_idx); + uint32_t rkey = + nixl_prepare_mkey(xferReqRing[pos].rkey[idx], xferReqRing[pos].keys_are_swapped); + uint32_t lkey = + nixl_prepare_mkey(xferReqRing[pos].lkey[idx], xferReqRing[pos].keys_are_swapped); #if ENABLE_DEBUG == 1 printf("prepare_write radd %lx rkey %x ladd %lx lkey %x size %ld\n", (uint64_t)(xferReqRing[pos].rbuf[idx]), - xferReqRing[pos].rkey[idx], + rkey, (uint64_t)(xferReqRing[pos].lbuf[idx]), - xferReqRing[pos].lkey[idx], + lkey, (uint64_t)xferReqRing[pos].size[idx]); #endif doca_gpu_dev_verbs_wqe_prepare_write(qp, @@ -198,9 +239,9 @@ kernel_write(doca_gpu_dev_verbs_qp *qp, struct docaXferReqGpu *xferReqRing, uint cflag, 0, (uint64_t)(xferReqRing[pos].rbuf[idx]), - xferReqRing[pos].rkey[idx], + rkey, (uint64_t)(xferReqRing[pos].lbuf[idx]), - xferReqRing[pos].lkey[idx], + lkey, xferReqRing[pos].size[idx]); } __syncthreads(); @@ -273,12 +314,15 @@ kernel_progress(struct docaXferCompletion *completion_list, completion_list[index].xferReqRingGpu->has_notif_msg_idx), (int)completion_list[index].xferReqRingGpu->msg_sz); #endif + uint32_t notif_lkey = nixl_prepare_mkey( + completion_list[index].xferReqRingGpu->lkey_notif, + completion_list[index].xferReqRingGpu->keys_are_swapped); doca_gpu_dev_verbs_send( completion_list[index].xferReqRingGpu->qp_notif, doca_gpu_dev_verbs_addr{ .addr = (uint64_t)(completion_list[index] .xferReqRingGpu->lbuf_notif), - .key = completion_list[index].xferReqRingGpu->lkey_notif}, + .key = notif_lkey}, completion_list[index].xferReqRingGpu->msg_sz, &out_ticket); @@ -344,11 +388,13 @@ kernel_progress(struct docaXferCompletion *completion_list, for (int idx = 0; idx < DOCA_MAX_NOTIF_INFLIGHT; idx++) { struct mlx5_wqe_data_seg *rwqe_ptr = doca_gpu_dev_verbs_get_rwqe_ptr(notif_fill->qp_gpu, idx); + uint32_t fill_lkey = + nixl_prepare_mkey(notif_fill->msg_lkey, notif_fill->keys_are_swapped); doca_gpu_dev_verbs_wqe_prepare_recv( notif_fill->qp_gpu, rwqe_ptr, (uint64_t)(notif_fill->msg_buf + (notif_fill->msg_size * idx)), - notif_fill->msg_lkey, + fill_lkey, notif_fill->msg_size); } @@ -367,10 +413,12 @@ kernel_progress(struct docaXferCompletion *completion_list, if (blockIdx.x == 2) { while (DOCA_GPUNETIO_VOLATILE(*exit_flag) == 0) { if (DOCA_GPUNETIO_VOLATILE(notif_send_gpu->qp_gpu) != nullptr) { + uint32_t send_lkey = + nixl_prepare_mkey(notif_send_gpu->msg_lkey, notif_send_gpu->keys_are_swapped); doca_gpu_dev_verbs_send( notif_send_gpu->qp_gpu, doca_gpu_dev_verbs_addr{.addr = (uint64_t)notif_send_gpu->msg_buf, - .key = notif_send_gpu->msg_lkey}, + .key = send_lkey}, notif_send_gpu->msg_size, &out_ticket); @@ -379,7 +427,7 @@ kernel_progress(struct docaXferCompletion *completion_list, printf("Notif correctly sent %ld addr %lx msg_lkey %x qp %p size %d\n", out_ticket, notif_send_gpu->msg_buf, - notif_send_gpu->msg_lkey, + send_lkey, (void *)notif_send_gpu->qp_gpu, (int)notif_send_gpu->msg_size); #endif @@ -414,7 +462,6 @@ doca_kernel_write(cudaStream_t stream, return DOCA_SUCCESS; } - doca_error_t doca_kernel_read(cudaStream_t stream, doca_gpu_dev_verbs_qp *qp, diff --git a/src/plugins/gpunetio/verbs/verbs.cpp b/src/plugins/gpunetio/verbs/verbs.cpp index 11add6a3c2..829309a729 100644 --- a/src/plugins/gpunetio/verbs/verbs.cpp +++ b/src/plugins/gpunetio/verbs/verbs.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -463,8 +463,14 @@ mr::mr(doca_gpu *gpu_dev_, void *addr_, uint32_t elem_num_, size_t elem_size_, s if (ibmr == nullptr) throw std::invalid_argument("Failed to create mr"); } - lkey = htobe32(ibmr->lkey); - rkey = htobe32(ibmr->rkey); + lkey = ibmr->lkey; + rkey = ibmr->rkey; + + NIXL_DEBUG << "[dbg] MR path: " << (dmabuf_fd >= 0 ? "dmabuf" : "peermem/ibv_reg_mr") + << ", addr 0x" << std::hex << std::uppercase << (uintptr_t)addr << std::dec + << " len 0x" << std::hex << (uint64_t)tot_size << std::dec << " lkey 0x" << std::hex + << (uint32_t)lkey << std::dec << " rkey 0x" << std::hex << (uint32_t)rkey + << std::dec; } mr::mr(void *addr_, size_t tot_size_, uint32_t rkey_) diff --git a/subprojects/.wraplock b/subprojects/.wraplock new file mode 100644 index 0000000000..a4d636226b --- /dev/null +++ b/subprojects/.wraplock @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + diff --git a/test/gtest/device_api/utils.cu b/test/gtest/device_api/utils.cu index 208d2fbdb5..46fc93d9ea 100644 --- a/test/gtest/device_api/utils.cu +++ b/test/gtest/device_api/utils.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/test/gtest/telemetry_test.cpp b/test/gtest/telemetry_test.cpp index 63c5f0c020..1b95bd2081 100644 --- a/test/gtest/telemetry_test.cpp +++ b/test/gtest/telemetry_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/gtest/test_transfer.cpp b/test/gtest/test_transfer.cpp index c6f5d1a929..0493090051 100644 --- a/test/gtest/test_transfer.cpp +++ b/test/gtest/test_transfer.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/nixl/nixl_test.cpp b/test/nixl/nixl_test.cpp index ce6d81cc16..055496d889 100644 --- a/test/nixl/nixl_test.cpp +++ b/test/nixl/nixl_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/unit/plugins/hf3fs/nixl_hf3fs_mt_test.cpp b/test/unit/plugins/hf3fs/nixl_hf3fs_mt_test.cpp index 191c461b4d..4aa0d64cbc 100644 --- a/test/unit/plugins/hf3fs/nixl_hf3fs_mt_test.cpp +++ b/test/unit/plugins/hf3fs/nixl_hf3fs_mt_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/unit/plugins/hf3fs/nixl_hf3fs_test.cpp b/test/unit/plugins/hf3fs/nixl_hf3fs_test.cpp index 54a2b36b3d..c928516856 100644 --- a/test/unit/plugins/hf3fs/nixl_hf3fs_test.cpp +++ b/test/unit/plugins/hf3fs/nixl_hf3fs_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License");