Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions tests/full_tests/ci_gsm8k_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ run_pd_disaggregate_nixl_libfabric_test() {
echo "✅ PD disaggregate through NIXL libfabric."
}

run_pd_disaggregate_nixl_ucx_test() {
echo "➡️ Testing PD disaggregate through NIXL UCX."
git clone https://github.com/intel-staging/ucx.git -b intel_gaudi_gdr_enabling_0
bash ucx/setup_nixl_ucx.sh
rm -rf ucx
cd ${VLLM_GAUDI_PREFIX}/tests/unit_tests; DECODER_TP_SIZE=1 NIXL_BUFFER_DEVICE=hpu VLLM_NIXL_BACKEND=UCX bash run_accuracy_test.sh
echo "✅ PD disaggregate through NIXL UCX."
}

# sleep mode
run_sleep_mode_test() {
echo "Testing basic model with sleep mode / wake up functionality"
Expand Down
11 changes: 7 additions & 4 deletions tests/unit_tests/run_accuracy_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ export PT_HPU_LAZY_MODE=1
NIXL_BUFFER_DEVICE=${NIXL_BUFFER_DEVICE:-"cpu"}
VLLM_NIXL_BACKEND=${VLLM_NIXL_BACKEND:-"UCX"}

UCX_TLS="tcp"
if [ "$VLLM_NIXL_BACKEND" == "UCX" ]; then
export VLLM_NIXL_DEVICE_TO_DEVICE=false
if [ "$NIXL_BUFFER_DEVICE" == "hpu" ]; then
UCX_TLS="gaudi_gdr,ib,rc,ud"
fi
else
export VLLM_NIXL_DEVICE_TO_DEVICE=true
fi
Expand All @@ -42,8 +46,7 @@ PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
DECODER_TP_SIZE=${DECODER_TP_SIZE:-2}

# Find the git repository root directory
#GIT_ROOT=$(git rev-parse --show-toplevel)
GIT_ROOT="/home/vllm-nixl/vllm"
GIT_ROOT=$(git rev-parse --show-toplevel)

#SMI_BIN=$(which nvidia-smi || which rocm-smi)

Expand Down Expand Up @@ -116,7 +119,7 @@ run_tests_for_model() {
echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT"

# Build the command with or without model-specific args
BASE_CMD="RANK=0 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
BASE_CMD="RANK=0 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
--port $PORT \
--enforce-eager \
--max_num_batched_tokens 8192 \
Expand Down Expand Up @@ -149,7 +152,7 @@ run_tests_for_model() {
echo "Starting decode instance $i on GPU $GPU_ID, port $PORT"

# Build the command with or without model-specific args
BASE_CMD="RANK=1 UCX_TLS=tcp VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
BASE_CMD="RANK=1 UCX_TLS=$UCX_TLS VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
--port $PORT \
--enforce-eager \
--max_num_batched_tokens 8192 \
Expand Down
Loading