fix(infra): convert config.rollout via to_structured_cfg in all launchers (#956)

ZiyiTsang · gemini-code-assist[bot] · rchardx · web-flow · commit 60931095481e · 2026-03-02T19:35:57.000+08:00
Launcher subprocesses use parse_cli_args which returns a plain
DictConfig without dataclass defaults. Accessing fields like
return_routed_experts on unconverted config.rollout crashes because
the key is absent from YAML. sglang_server.py already had this fix;
apply the same to_structured_cfg(config.rollout, InferenceEngineConfig)
to vllm_server.py, local.py, ray.py, and slurm.py.

---------

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
Co-authored-by: Wentai Zhang &lt;zhangwentai.zwt@antgroup.com&gt;
diff --git a/areal/infra/launcher/local.py b/areal/infra/launcher/local.py
@@ -12,6 +12,7 @@
 from areal.api.alloc_mode import AllocationMode, AllocationType
 from areal.api.cli_args import (
     ClusterSpecConfig,
+    InferenceEngineConfig,
     RecoverConfig,
     SGLangConfig,
     parse_cli_args,
@@ -304,6 +305,8 @@ def local_main(config, run_id: int = 0):
             config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
             random_seed = config.vllm.seed
 
+        config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
+
         backend_spec = {
             "sglang": {
                 "module": "areal.infra.launcher.sglang_server",
diff --git a/areal/infra/launcher/ray.py b/areal/infra/launcher/ray.py
@@ -16,6 +16,7 @@
 from areal.api.alloc_mode import AllocationMode, AllocationType
 from areal.api.cli_args import (
     ClusterSpecConfig,
+    InferenceEngineConfig,
     RecoverConfig,
     SGLangConfig,
     parse_cli_args,
@@ -376,6 +377,7 @@ def ray_main(config, run_id: int = 0):
     actor_spec = get_scheduling_spec(config.actor)
 
     if allocation_mode.gen_backend in ("sglang", "vllm"):
+        config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
         rollout_spec = get_scheduling_spec(config.rollout)
 
     if not is_recover_run:
diff --git a/areal/infra/launcher/sglang_server.py b/areal/infra/launcher/sglang_server.py
@@ -12,6 +12,7 @@
 from areal.api.alloc_mode import AllocationMode
 from areal.api.cli_args import (
     ClusterSpecConfig,
+    InferenceEngineConfig,
     NameResolveConfig,
     SGLangConfig,
     parse_cli_args,
@@ -214,6 +215,7 @@ def launch_sglang_server(argv):
     config.cluster.name_resolve = to_structured_cfg(
         config.cluster.name_resolve, NameResolveConfig
     )
+    config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
     name_resolve.reconfigure(config.cluster.name_resolve)
 
     allocation_mode = config.allocation_mode
diff --git a/areal/infra/launcher/slurm.py b/areal/infra/launcher/slurm.py
@@ -10,6 +10,7 @@
 from areal.api.alloc_mode import AllocationMode, AllocationType
 from areal.api.cli_args import (
     ClusterSpecConfig,
+    InferenceEngineConfig,
     RecoverConfig,
     SGLangConfig,
     parse_cli_args,
@@ -456,6 +457,7 @@ def slurm_main(config, run_id: int = 0):
             config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
             random_seed = config.vllm.seed
 
+        config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
         # Get rollout scheduling spec
         rollout_spec = get_scheduling_spec(config.rollout)
 
diff --git a/areal/infra/launcher/vllm_server.py b/areal/infra/launcher/vllm_server.py
@@ -12,6 +12,7 @@
 
 from areal.api.cli_args import (
     ClusterSpecConfig,
+    InferenceEngineConfig,
     NameResolveConfig,
     parse_cli_args,
     to_structured_cfg,
@@ -252,6 +253,7 @@ def launch_vllm_server(argv):
     config.cluster.name_resolve = to_structured_cfg(
         config.cluster.name_resolve, NameResolveConfig
     )
+    config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
     name_resolve.reconfigure(config.cluster.name_resolve)
 
     allocation_mode = config.allocation_mode