diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
index 57489df64f51..4a22be1da33c 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
@@ -104,6 +104,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --ec-transfer-config '{
         "ec_connector": "ECSharedStorageConnector",
         "ec_role": "ec_producer",
+        "ec_buffer_device": "cuda",
         "ec_connector_extra_config": {
             "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
         }
@@ -128,6 +129,7 @@ vllm serve "$MODEL" \
     --ec-transfer-config '{
         "ec_connector": "ECSharedStorageConnector",
         "ec_role": "ec_consumer",
+        "ec_buffer_device": "cuda",
         "ec_connector_extra_config": {
             "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
         }
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
index 6073e0580b11..344f4a2ffe58 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
@@ -98,6 +98,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --ec-transfer-config '{
         "ec_connector": "ECSharedStorageConnector",
         "ec_role": "ec_producer",
+        "ec_buffer_device": "cuda",
         "ec_connector_extra_config": {
             "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
         }
@@ -119,6 +120,7 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
     --ec-transfer-config '{
         "ec_connector": "ECSharedStorageConnector",
         "ec_role": "ec_consumer",
+        "ec_buffer_device": "cuda",
         "ec_connector_extra_config": {
             "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
         }
diff --git a/vllm/distributed/ec_transfer/ec_connector/shared_storage_connector.py b/vllm/distributed/ec_transfer/ec_connector/shared_storage_connector.py
index c8388141dcc9..77211ae3b344 100644
--- a/vllm/distributed/ec_transfer/ec_connector/shared_storage_connector.py
+++ b/vllm/distributed/ec_transfer/ec_connector/shared_storage_connector.py
@@ -51,10 +51,13 @@ def __init__(self, vllm_config: "VllmConfig", role: ECConnectorRole):
         # req_id -> index
         self._mm_datas_need_loads: dict[str, int] = {}
         transfer_config = vllm_config.ec_transfer_config
+        self.device = "cuda"
         if transfer_config is not None:
             self._storage_path = transfer_config.get_from_extra_config(
                 "shared_storage_path", "/tmp"
             )
+            if transfer_config.ec_buffer_device is not None:
+                self.device = transfer_config.ec_buffer_device
             logger.debug(transfer_config)
             logger.debug("Shared storage path is %s", self._storage_path)
         else:
@@ -91,7 +94,7 @@ def start_load_caches(self, encoder_cache, **kwargs) -> None:
             if mm_data.mm_hash in encoder_cache:
                 continue
             filename = self._generate_filename_debug(mm_data.mm_hash)
-            ec_cache = safetensors.torch.load_file(filename)["ec_cache"].cuda()
+            ec_cache = safetensors.torch.load_file(filename)["ec_cache"].to(self.device)
             encoder_cache[mm_data.mm_hash] = ec_cache
             logger.debug("Success load encoder cache for hash %s", mm_data.mm_hash)