Add qwen3_30b_a3b.yaml config for test_policy_update.py (#682)

daniellepintz · web-flow · commit 76abbcc8d5c7 · 2026-02-09T13:32:25.000-05:00
diff --git a/tests/integration_tests/fixtures/qwen3_30b_a3b.yaml b/tests/integration_tests/fixtures/qwen3_30b_a3b.yaml
@@ -0,0 +1,80 @@
+# pytest -s tests/integration_tests/test_policy_update.py::TestWeightSync::test_sanity_check --config tests/integration_tests/fixtures/qwen3_30b_a3b.yaml
+
+# trainer tp = 1, generator tp = 4
+
+# Global configuration
+group_size: 8
+batch_size: 2
+max_req_tokens: 256
+max_res_tokens: 256
+model: "Qwen/Qwen3-30B-A3B"
+off_by_n: 1 # Off by one by default
+compile: false # Enable torch.compile for trainer, and CUDA graphs for vLLM
+
+
+# Generator configuration
+generator:
+  engine_args:
+    model: ${model}
+    tensor_parallel_size: 4
+    pipeline_parallel_size: 1
+    enforce_eager: ${not:${compile}}
+  sampling_params:
+    n: ${group_size}
+    max_tokens: ${max_res_tokens}
+    temperature: 1.0
+    top_p: 1.0
+
+# Trainer configuration
+trainer:
+  model:
+    name: qwen3
+    flavor: 30B-A3B
+    hf_assets_path: hf://${model}
+  optimizer:
+    name: AdamW
+    lr: 1e-5
+    eps: 1e-8
+  lr_scheduler:
+    warmup_steps: 1
+  training:
+    local_batch_size: ${batch_size}
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
+    max_norm: 1.0
+    steps: 1000000
+    dtype: bfloat16
+    gc_freq: 1
+  compile:
+    enable: ${compile}
+  parallelism:
+    data_parallel_replicate_degree: 1
+    data_parallel_shard_degree: -1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    context_parallel_degree: 1
+    expert_parallel_degree: 1
+    expert_tensor_parallel_degree: 1
+    disable_loss_parallel: true
+  checkpoint:
+    enable: true
+    initial_load_path: hf://${model}
+    initial_load_in_hf: true
+    last_save_in_hf: true
+    interval: 500
+    async_mode: "disabled"
+  activation_checkpoint:
+    mode: selective
+    selective_ac_option: op
+
+# All resource allocations
+services:
+  generator:
+    procs: ${generator.engine_args.tensor_parallel_size}
+    num_replicas: 1
+    with_gpus: true
+
+actors:
+  trainer:
+    procs: 4
+    num_replicas: 1
+    with_gpus: true
diff --git a/tests/integration_tests/test_policy_update.py b/tests/integration_tests/test_policy_update.py
@@ -210,9 +210,9 @@ async def test_sanity_check(self, _setup_and_teardown):
 
         The check performs the following steps:
         - Initialize trainer and push weights v0 (original huggingface ckpt)
-        - Step the trainer, setting all weights to zero and push weights v1
-        - Load weights v0 and check the generator has all zero weights
-        - Load weights v1 and check the generator has all the weights back
+        - Set all the weights of the model on the trainer to zero and push weights as v1
+        - Load weights v1 and check the generator has all zero weights
+        - Load weights v0 and check the generator has all the original weights back
 
         """