fake0fan · herotai214 · Dec 10, 2025 · Dec 17, 2025
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
@@ -92,7 +92,10 @@ mkdir -p $EC_SHARED_STORAGE_PATH
 ###############################################################################
 # Encoder worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
+CUDA_VISIBLE_DEVICES="$GPU_E" \
+VLLM_DEBUG_DUMP_PATH=$LOG_PATH \
+VLLM_NIXL_EC_SIDE_CHANNEL_PORT=5569 \
+vllm serve "$MODEL" \
     --gpu-memory-utilization 0.01 \
     --port "$ENCODE_PORT" \
     --enforce-eager \
@@ -102,11 +105,8 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --max-num-seqs 128 \
     --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
-        "ec_connector": "ECSharedStorageConnector",
-        "ec_role": "ec_producer",
-        "ec_connector_extra_config": {
-            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
-        }
+        "ec_connector": "NixlECConnector",
+        "ec_role": "ec_producer"
     }' \
     >"${ENC_LOG}" 2>&1 &
 
@@ -116,8 +116,10 @@ PIDS+=($!)
 # Prefill worker
 ###############################################################################
 CUDA_VISIBLE_DEVICES="$GPU_P" \
+VLLM_DEBUG_DUMP_PATH=$LOG_PATH \
 UCX_NET_DEVICES=all \
 VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \
+VLLM_NIXL_EC_SIDE_CHANNEL_PORT=5579 \
 vllm serve "$MODEL" \
     --gpu-memory-utilization 0.7 \
     --port "$PREFILL_PORT" \
@@ -126,11 +128,8 @@ vllm serve "$MODEL" \
     --max-num-seqs 128 \
     --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
-        "ec_connector": "ECSharedStorageConnector",
-        "ec_role": "ec_consumer",
-        "ec_connector_extra_config": {
-            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
-        }
+        "ec_connector": "NixlECConnector",
+        "ec_role": "ec_consumer"
     }' \
     --kv-transfer-config '{
         "kv_connector": "NixlConnector",
@@ -144,6 +143,7 @@ PIDS+=($!)
 # Decode worker
 ###############################################################################
 CUDA_VISIBLE_DEVICES="$GPU_D" \
+VLLM_DEBUG_DUMP_PATH=$LOG_PATH \
 UCX_NET_DEVICES=all \
 VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \
 vllm serve "$MODEL" \

diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
@@ -14,7 +14,7 @@ ENCODE_PORT="${ENCODE_PORT:-19534}"
 PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}"
 PROXY_PORT="${PROXY_PORT:-10001}"
 
-GPU_E="${GPU_E:-0}"
+GPU_E="${GPU_E:-2}"
 GPU_PD="${GPU_PD:-1}"
 
 EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
@@ -86,7 +86,10 @@ mkdir -p $EC_SHARED_STORAGE_PATH
 ###############################################################################
 # Encoder worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
+CUDA_VISIBLE_DEVICES="$GPU_E" \
+VLLM_DEBUG_DUMP_PATH=$LOG_PATH/dump \
+VLLM_NIXL_EC_SIDE_CHANNEL_PORT=5569 \
+vllm serve "$MODEL" \
     --gpu-memory-utilization 0.01 \
     --port "$ENCODE_PORT" \
     --enforce-eager \
@@ -96,11 +99,8 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --max-num-seqs 128 \
     --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
-        "ec_connector": "ECSharedStorageConnector",
-        "ec_role": "ec_producer",
-        "ec_connector_extra_config": {
-            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
-        }
+        "ec_connector": "NixlECConnector",
+        "ec_role": "ec_producer"
     }' \
     >"${ENC_LOG}" 2>&1 &
 
@@ -109,19 +109,19 @@ PIDS+=($!)
 ###############################################################################
 # Prefill+Decode worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
+VLLM_NIXL_EC_SIDE_CHANNEL_PORT=5579 \
+VLLM_DEBUG_DUMP_PATH=$LOG_PATH/dump \
+CUDA_VISIBLE_DEVICES="$GPU_PD" \
+vllm serve "$MODEL" \
     --gpu-memory-utilization 0.7 \
     --port "$PREFILL_DECODE_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
     --max-num-seqs 128 \
     --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
-        "ec_connector": "ECSharedStorageConnector",
-        "ec_role": "ec_consumer",
-        "ec_connector_extra_config": {
-            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
-        }
+        "ec_connector": "NixlECConnector",
+        "ec_role": "ec_consumer"
     }' \
     >"${PD_LOG}" 2>&1 &
 
@@ -147,40 +147,44 @@ PIDS+=($!)
 wait_for_server $PROXY_PORT
 echo "All services are up!"
 
-###############################################################################
-# Benchmark
-###############################################################################
-echo "Running benchmark (stream)..."
-vllm bench serve \
-  --model               $MODEL \
-  --backend             openai-chat \
-  --endpoint            /v1/chat/completions \
-  --dataset-name        hf \
-  --dataset-path        lmarena-ai/VisionArena-Chat \
-  --seed                0 \
-  --num-prompts         $NUM_PROMPTS \
-  --port                $PROXY_PORT
-
-PIDS+=($!)
-
-###############################################################################
-# Single request with local image
-###############################################################################
-echo "Running single request with local image (non-stream)..."
-curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
-    -H "Content-Type: application/json" \
-    -d '{
-    "model": "'${MODEL}'",
-    "messages": [
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": [
-        {"type": "image_url", "image_url": {"url": "file://'"${GIT_ROOT}"'/tests/v1/ec_connector/integration/hato.jpg"}},
-        {"type": "text", "text": "What is in this image?"}
-    ]}
-    ]
-    }'
-
-
-# cleanup
-echo "cleanup..."
-cleanup
+# ###############################################################################
+# # Benchmark
+# ###############################################################################
+# echo "Running benchmark (stream)..."
+# vllm bench serve \
+#   --model               $MODEL \
+#   --backend             openai-chat \
+#   --endpoint            /v1/chat/completions \
+#   --dataset-name        hf \
+#   --dataset-path        lmarena-ai/VisionArena-Chat \
+#   --seed                0 \
+#   --num-prompts         $NUM_PROMPTS \
+#   --save-result \
+#   --save-detailed \
+#   --result-dir $LOG_PATH \
+#   --result-filename ePD_nixl_$(date +"%Y%m%d_%H%M%S").json \
+#   --port                $PROXY_PORT
+
+# PIDS+=($!)
+
+# # ###############################################################################
+# # # Single request with local image
+# # ###############################################################################
+# # echo "Running single request with local image (non-stream)..."
+# # curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+# #     -H "Content-Type: application/json" \
+# #     -d '{
+# #     "model": "'${MODEL}'",
+# #     "messages": [
+# #     {"role": "system", "content": "You are a helpful assistant."},
+# #     {"role": "user", "content": [
+# #         {"type": "image_url", "image_url": {"url": "file://'"${GIT_ROOT}"'/tests/v1/ec_connector/integration/hato.jpg"}},
+# #         {"type": "text", "text": "What is in this image?"}
+# #     ]}
+# #     ]
+# #     }'
+
+
+# # cleanup
+# echo "cleanup..."
+# cleanup
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example_sharedstorage.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example_sharedstorage.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+set -euo pipefail
+
+declare -a PIDS=()
+
+###############################################################################
+# Configuration -- override via env before running
+###############################################################################
+MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
+LOG_PATH="${LOG_PATH:-./logs}"
+mkdir -p $LOG_PATH
+
+ENCODE_PORT="${ENCODE_PORT:-19534}"
+PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}"
+PROXY_PORT="${PROXY_PORT:-10001}"
+
+GPU_E="${GPU_E:-2}"
+GPU_PD="${GPU_PD:-1}"
+
+EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
+TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-12000}"   # wait_for_server timeout
+
+NUM_PROMPTS="${NUM_PROMPTS:-100}"    # number of prompts to send in benchmark
+
+###############################################################################
+# Helpers
+###############################################################################
+# Find the git repository root directory
+GIT_ROOT=$(git rev-parse --show-toplevel)
+
+START_TIME=$(date +"%Y%m%d_%H%M%S")
+ENC_LOG=$LOG_PATH/encoder_${START_TIME}.log
+PD_LOG=$LOG_PATH/pd_${START_TIME}.log
+PROXY_LOG=$LOG_PATH/proxy_${START_TIME}.log
+
+wait_for_server() {
+    local port=$1
+    timeout "$TIMEOUT_SECONDS" bash -c "
+        until curl -s localhost:$port/v1/chat/completions > /dev/null; do
+            sleep 1
+        done" && return 0 || return 1
+}
+
+# Cleanup function
+cleanup() {
+    echo "Stopping everything…"
+    trap - INT TERM USR1   # prevent re-entrancy
+
+    # Kill all tracked PIDs
+    for pid in "${PIDS[@]}"; do
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "Killing process $pid"
+            kill "$pid" 2>/dev/null
+        fi
+    done
+
+    # Wait a moment for graceful shutdown
+    sleep 2
+
+    # Force kill any remaining processes
+    for pid in "${PIDS[@]}"; do
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "Force killing process $pid"
+            kill -9 "$pid" 2>/dev/null
+        fi
+    done
+
+    # Kill the entire process group as backup
+    kill -- -$$ 2>/dev/null
+
+    echo "All processes stopped."
+    exit 0
+}
+
+trap cleanup INT
+trap cleanup USR1
+trap cleanup TERM
+
+# clear previous cache
+echo "remove previous ec cache folder"
+rm -rf $EC_SHARED_STORAGE_PATH
+
+echo "make ec cache folder"
+mkdir -p $EC_SHARED_STORAGE_PATH
+
+###############################################################################
+# Encoder worker
+###############################################################################
+CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
+    --gpu-memory-utilization 0.01 \
+    --port "$ENCODE_PORT" \
+    --enforce-eager \
+    --enable-request-id-headers \
+    --no-enable-prefix-caching \
+    --max-num-batched-tokens 114688 \
+    --max-num-seqs 128 \
+    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --ec-transfer-config '{
+        "ec_connector": "ECSharedStorageConnector",
+        "ec_role": "ec_producer",
+        "ec_connector_extra_config": {
+            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
+        }
+    }' \
+    >"${ENC_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+###############################################################################
+# Prefill+Decode worker
+###############################################################################
+CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
+    --gpu-memory-utilization 0.7 \
+    --port "$PREFILL_DECODE_PORT" \
+    --enforce-eager \
+    --enable-request-id-headers \
+    --max-num-seqs 128 \
+    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --ec-transfer-config '{
+        "ec_connector": "ECSharedStorageConnector",
+        "ec_role": "ec_consumer",
+        "ec_connector_extra_config": {
+            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
+        }
+    }' \
+    >"${PD_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+# Wait for workers
+wait_for_server $ENCODE_PORT
+wait_for_server $PREFILL_DECODE_PORT
+
+###############################################################################
+# Proxy
+###############################################################################
+python disagg_epd_proxy.py \
+    --host "0.0.0.0" \
+    --port "$PROXY_PORT" \
+    --encode-servers-urls "http://localhost:$ENCODE_PORT" \
+    --prefill-servers-urls "disable" \
+    --decode-servers-urls "http://localhost:$PREFILL_DECODE_PORT" \
+    >"${PROXY_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+wait_for_server $PROXY_PORT
+echo "All services are up!"
+
+###############################################################################
+# Benchmark
+###############################################################################
+echo "Running benchmark (stream)..."
+vllm bench serve \
+  --model               $MODEL \
+  --backend             openai-chat \
+  --endpoint            /v1/chat/completions \
+  --dataset-name        hf \
+  --dataset-path        lmarena-ai/VisionArena-Chat \
+  --seed                0 \
+  --num-prompts         $NUM_PROMPTS \
+  --save-result \
+  --save-detailed \
+  --result-dir $LOG_PATH \
+  --result-filename ePD_nixl_shared_$(date +"%Y%m%d_%H%M%S").json \
+  --port                $PROXY_PORT
+
+PIDS+=($!)
+
+###############################################################################
+# Single request with local image
+###############################################################################
+echo "Running single request with local image (non-stream)..."
+curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+    "model": "'${MODEL}'",
+    "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": [
+        {"type": "image_url", "image_url": {"url": "file://'"${GIT_ROOT}"'/tests/v1/ec_connector/integration/hato.jpg"}},
+        {"type": "text", "text": "What is in this image?"}
+    ]}
+    ]
+    }'
+
+
+# cleanup
+echo "cleanup..."
+cleanup