From e35d3171e533f633393372ed38d2f6ed047d0fde Mon Sep 17 00:00:00 2001 From: root Date: Fri, 31 Oct 2025 03:41:38 +0000 Subject: [PATCH 1/4] add cpu suuport to ray serve Signed-off-by: root --- .../llm/_internal/serve/engines/vllm/vllm_models.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py index 7af47e8b588a..5da9cd759bca 100644 --- a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py +++ b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py @@ -129,9 +129,10 @@ def get_initialization_kwargs(self) -> dict: "distributed_executor_backend" in engine_kwargs and engine_kwargs["distributed_executor_backend"] != "ray" ): - raise ValueError( - "distributed_executor_backend != 'ray' is not allowed in engine_kwargs when using Ray Serve LLM Configs." + logger.warning( + "install vllm package for cpu to ensure seamless execution" ) + engine_kwargs["distributed_executor_backend"] = "mp" else: engine_kwargs["distributed_executor_backend"] = "ray" @@ -265,8 +266,8 @@ def use_gpu(self) -> bool: # Default behavior based on accelerator_type if not self.accelerator_type: - # By default, GPU resources are used - return True + # Use cpu if gpu not provided or none provided + return False return self.accelerator_type in ( GPUType.NVIDIA_TESLA_V100.value, @@ -318,3 +319,4 @@ def get_or_create_pg(self) -> PlacementGroup: logger.info(f"Using new placement group {pg}. {placement_group_table(pg)}") return pg + From f1fea924af67e3806d1ccbe6dedea70eff319fe9 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 31 Oct 2025 03:57:36 +0000 Subject: [PATCH 2/4] add logger Signed-off-by: root --- python/ray/llm/_internal/serve/engines/vllm/vllm_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py index 5da9cd759bca..574b8be909ef 100644 --- a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py +++ b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py @@ -129,10 +129,10 @@ def get_initialization_kwargs(self) -> dict: "distributed_executor_backend" in engine_kwargs and engine_kwargs["distributed_executor_backend"] != "ray" ): - logger.warning( + if engine_kwargs["distributed_executor_backend"] == "mp": + logger.warning( "install vllm package for cpu to ensure seamless execution" - ) - engine_kwargs["distributed_executor_backend"] = "mp" + ) else: engine_kwargs["distributed_executor_backend"] = "ray" From 0d86dd7cfbf75968d10b07b5c57b1fe4f8c1addb Mon Sep 17 00:00:00 2001 From: root Date: Fri, 31 Oct 2025 04:08:43 +0000 Subject: [PATCH 3/4] modified validation logic Signed-off-by: root --- python/ray/llm/_internal/serve/engines/vllm/vllm_models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py index 574b8be909ef..4acb1b9c9071 100644 --- a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py +++ b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py @@ -129,10 +129,15 @@ def get_initialization_kwargs(self) -> dict: "distributed_executor_backend" in engine_kwargs and engine_kwargs["distributed_executor_backend"] != "ray" ): - if engine_kwargs["distributed_executor_backend"] == "mp": + if not self.use_gpu: + engine_kwargs["distributed_executor_backend"] == "mp" logger.warning( "install vllm package for cpu to ensure seamless execution" ) + else: + raise ValueError( + "distributed_executor_backend != 'ray' is not allowed in engine_kwargs when using Ray Serve LLM Configs." + ) else: engine_kwargs["distributed_executor_backend"] = "ray" From 7bd9aeae9530327863dbe494c20bcbfbdf88e40e Mon Sep 17 00:00:00 2001 From: root Date: Fri, 31 Oct 2025 04:13:30 +0000 Subject: [PATCH 4/4] fix typo Signed-off-by: root --- python/ray/llm/_internal/serve/engines/vllm/vllm_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py index 4acb1b9c9071..ba2d5efdfae5 100644 --- a/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py +++ b/python/ray/llm/_internal/serve/engines/vllm/vllm_models.py @@ -130,7 +130,7 @@ def get_initialization_kwargs(self) -> dict: and engine_kwargs["distributed_executor_backend"] != "ray" ): if not self.use_gpu: - engine_kwargs["distributed_executor_backend"] == "mp" + engine_kwargs["distributed_executor_backend"] = "mp" logger.warning( "install vllm package for cpu to ensure seamless execution" )