From 548056869cd13f233e19a7fe7b7d19a3035aef16 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 11 Dec 2025 17:14:49 -0800 Subject: [PATCH] Add ucx test Signed-off-by: Daniel Huang --- tests/full_tests/ci_gsm8k_tests.sh | 9 +++++++++ tests/unit_tests/run_accuracy_test.sh | 11 +++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/full_tests/ci_gsm8k_tests.sh b/tests/full_tests/ci_gsm8k_tests.sh index f2f86c8cd..055ff6c44 100644 --- a/tests/full_tests/ci_gsm8k_tests.sh +++ b/tests/full_tests/ci_gsm8k_tests.sh @@ -290,6 +290,15 @@ run_pd_disaggregate_nixl_libfabric_test() { echo "✅ PD disaggregate through NIXL libfabric." } +run_pd_disaggregate_nixl_ucx_test() { + echo "➡️ Testing PD disaggregate through NIXL UCX." + git clone https://github.com/intel-staging/ucx.git -b intel_gaudi_gdr_enabling_0 + bash ucx/setup_nixl_ucx.sh + rm -rf ucx + cd ${VLLM_GAUDI_PREFIX}/tests/unit_tests; DECODER_TP_SIZE=1 NIXL_BUFFER_DEVICE=hpu VLLM_NIXL_BACKEND=UCX bash run_accuracy_test.sh + echo "✅ PD disaggregate through NIXL UCX." +} + # sleep mode run_sleep_mode_test() { echo "Testing basic model with sleep mode / wake up functionality" diff --git a/tests/unit_tests/run_accuracy_test.sh b/tests/unit_tests/run_accuracy_test.sh index 3b877d09b..abda9dfd5 100755 --- a/tests/unit_tests/run_accuracy_test.sh +++ b/tests/unit_tests/run_accuracy_test.sh @@ -29,8 +29,12 @@ export PT_HPU_LAZY_MODE=1 NIXL_BUFFER_DEVICE=${NIXL_BUFFER_DEVICE:-"cpu"} VLLM_NIXL_BACKEND=${VLLM_NIXL_BACKEND:-"UCX"} +UCX_TLS="tcp" if [ "$VLLM_NIXL_BACKEND" == "UCX" ]; then export VLLM_NIXL_DEVICE_TO_DEVICE=false + if [ "$NIXL_BUFFER_DEVICE" == "hpu" ]; then + UCX_TLS="gaudi_gdr,ib,rc,ud" + fi else export VLLM_NIXL_DEVICE_TO_DEVICE=true fi @@ -42,8 +46,7 @@ PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1} DECODER_TP_SIZE=${DECODER_TP_SIZE:-2} # Find the git repository root directory -#GIT_ROOT=$(git rev-parse --show-toplevel) -GIT_ROOT="/home/vllm-nixl/vllm" +GIT_ROOT=$(git rev-parse --show-toplevel) #SMI_BIN=$(which nvidia-smi || which rocm-smi) @@ -116,7 +119,7 @@ run_tests_for_model() { echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT" # Build the command with or without model-specific args - BASE_CMD="RANK=0 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ + BASE_CMD="RANK=0 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ --port $PORT \ --enforce-eager \ --max_num_batched_tokens 8192 \ @@ -149,7 +152,7 @@ run_tests_for_model() { echo "Starting decode instance $i on GPU $GPU_ID, port $PORT" # Build the command with or without model-specific args - BASE_CMD="RANK=1 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ + BASE_CMD="RANK=1 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ --port $PORT \ --enforce-eager \ --max_num_batched_tokens 8192 \