Skip to content

Commit 156e4c2

Browse files
committed
Add ucx test
Signed-off-by: Daniel Huang <daniel1.huang@intel.com>
1 parent e0b176c commit 156e4c2

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

tests/full_tests/ci_gsm8k_tests.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,15 @@ run_pd_disaggregate_nixl_libfabric_test() {
290290
echo "✅ PD disaggregate through NIXL libfabric."
291291
}
292292

293+
run_pd_disaggregate_nixl_ucx_test() {
294+
echo "➡️ Testing PD disaggregate through NIXL UCX."
295+
git clone https://github.com/intel-staging/ucx.git -b intel_gaudi_gdr_enabling_0
296+
bash ucx/setup_nixl_ucx.sh
297+
rm -rf ucx
298+
cd ${VLLM_GAUDI_PREFIX}/tests/unit_tests; DECODER_TP_SIZE=1 NIXL_BUFFER_DEVICE=hpu VLLM_NIXL_BACKEND=UCX bash run_accuracy_test.sh
299+
echo "✅ PD disaggregate through NIXL UCX."
300+
}
301+
293302
# sleep mode
294303
run_sleep_mode_test() {
295304
echo "Testing basic model with sleep mode / wake up functionality"

tests/unit_tests/run_accuracy_test.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ export PT_HPU_LAZY_MODE=1
2929
NIXL_BUFFER_DEVICE=${NIXL_BUFFER_DEVICE:-"cpu"}
3030
VLLM_NIXL_BACKEND=${VLLM_NIXL_BACKEND:-"UCX"}
3131

32+
UCX_TLS="tcp"
3233
if [ "$VLLM_NIXL_BACKEND" == "UCX" ]; then
3334
export VLLM_NIXL_DEVICE_TO_DEVICE=false
35+
if [ "$NIXL_BUFFER_DEVICE" == "hpu" ]; then
36+
UCX_TLS="gaudi_gdr,ib,rc,ud"
37+
fi
3438
else
3539
export VLLM_NIXL_DEVICE_TO_DEVICE=true
3640
fi
@@ -42,8 +46,7 @@ PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
4246
DECODER_TP_SIZE=${DECODER_TP_SIZE:-2}
4347

4448
# Find the git repository root directory
45-
#GIT_ROOT=$(git rev-parse --show-toplevel)
46-
GIT_ROOT="/home/vllm-nixl/vllm"
49+
GIT_ROOT=$(git rev-parse --show-toplevel)
4750

4851
#SMI_BIN=$(which nvidia-smi || which rocm-smi)
4952

@@ -116,7 +119,7 @@ run_tests_for_model() {
116119
echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT"
117120

118121
# Build the command with or without model-specific args
119-
BASE_CMD="RANK=0 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
122+
BASE_CMD="RANK=0 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
120123
--port $PORT \
121124
--enforce-eager \
122125
--max_num_batched_tokens 8192 \
@@ -149,7 +152,7 @@ run_tests_for_model() {
149152
echo "Starting decode instance $i on GPU $GPU_ID, port $PORT"
150153

151154
# Build the command with or without model-specific args
152-
BASE_CMD="RANK=1 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
155+
BASE_CMD="RANK=1 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
153156
--port $PORT \
154157
--enforce-eager \
155158
--max_num_batched_tokens 8192 \

0 commit comments

Comments
 (0)