Skip to content

Commit 6093109

Browse files
ZiyiTsanggemini-code-assist[bot]rchardx
authored
fix(infra): convert config.rollout via to_structured_cfg in all launchers (#956)
Launcher subprocesses use parse_cli_args which returns a plain DictConfig without dataclass defaults. Accessing fields like return_routed_experts on unconverted config.rollout crashes because the key is absent from YAML. sglang_server.py already had this fix; apply the same to_structured_cfg(config.rollout, InferenceEngineConfig) to vllm_server.py, local.py, ray.py, and slurm.py. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Wentai Zhang <zhangwentai.zwt@antgroup.com>
1 parent 7cb786b commit 6093109

5 files changed

Lines changed: 11 additions & 0 deletions

File tree

areal/infra/launcher/local.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from areal.api.alloc_mode import AllocationMode, AllocationType
1313
from areal.api.cli_args import (
1414
ClusterSpecConfig,
15+
InferenceEngineConfig,
1516
RecoverConfig,
1617
SGLangConfig,
1718
parse_cli_args,
@@ -304,6 +305,8 @@ def local_main(config, run_id: int = 0):
304305
config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
305306
random_seed = config.vllm.seed
306307

308+
config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
309+
307310
backend_spec = {
308311
"sglang": {
309312
"module": "areal.infra.launcher.sglang_server",

areal/infra/launcher/ray.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from areal.api.alloc_mode import AllocationMode, AllocationType
1717
from areal.api.cli_args import (
1818
ClusterSpecConfig,
19+
InferenceEngineConfig,
1920
RecoverConfig,
2021
SGLangConfig,
2122
parse_cli_args,
@@ -376,6 +377,7 @@ def ray_main(config, run_id: int = 0):
376377
actor_spec = get_scheduling_spec(config.actor)
377378

378379
if allocation_mode.gen_backend in ("sglang", "vllm"):
380+
config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
379381
rollout_spec = get_scheduling_spec(config.rollout)
380382

381383
if not is_recover_run:

areal/infra/launcher/sglang_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from areal.api.alloc_mode import AllocationMode
1313
from areal.api.cli_args import (
1414
ClusterSpecConfig,
15+
InferenceEngineConfig,
1516
NameResolveConfig,
1617
SGLangConfig,
1718
parse_cli_args,
@@ -214,6 +215,7 @@ def launch_sglang_server(argv):
214215
config.cluster.name_resolve = to_structured_cfg(
215216
config.cluster.name_resolve, NameResolveConfig
216217
)
218+
config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
217219
name_resolve.reconfigure(config.cluster.name_resolve)
218220

219221
allocation_mode = config.allocation_mode

areal/infra/launcher/slurm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from areal.api.alloc_mode import AllocationMode, AllocationType
1111
from areal.api.cli_args import (
1212
ClusterSpecConfig,
13+
InferenceEngineConfig,
1314
RecoverConfig,
1415
SGLangConfig,
1516
parse_cli_args,
@@ -456,6 +457,7 @@ def slurm_main(config, run_id: int = 0):
456457
config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
457458
random_seed = config.vllm.seed
458459

460+
config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
459461
# Get rollout scheduling spec
460462
rollout_spec = get_scheduling_spec(config.rollout)
461463

areal/infra/launcher/vllm_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from areal.api.cli_args import (
1414
ClusterSpecConfig,
15+
InferenceEngineConfig,
1516
NameResolveConfig,
1617
parse_cli_args,
1718
to_structured_cfg,
@@ -252,6 +253,7 @@ def launch_vllm_server(argv):
252253
config.cluster.name_resolve = to_structured_cfg(
253254
config.cluster.name_resolve, NameResolveConfig
254255
)
256+
config.rollout = to_structured_cfg(config.rollout, InferenceEngineConfig)
255257
name_resolve.reconfigure(config.cluster.name_resolve)
256258

257259
allocation_mode = config.allocation_mode

0 commit comments

Comments
 (0)