fix

garrett4wade · garrett4wade · commit 03f3f3c120d0 · 2026-03-02T12:48:53.000+08:00
diff --git a/areal/experimental/openai/proxy/proxy_gateway.py b/areal/experimental/openai/proxy/proxy_gateway.py
@@ -365,20 +365,22 @@ async def start_session(request: Request):
             "reuse" if requested_key else "new",
         )
 
+        # Reject if a refresh is already in flight for this key.
+        # Must be checked BEFORE `in routes` since refresh pops the route.
+        if requested_key and requested_key in _refreshing:
+            return Response(
+                status_code=429,
+                content=json.dumps(
+                    {"detail": "A refresh is already in progress for this key."}
+                ).encode(),
+            )
+
         # ---- REFRESH PATH ----
         # Known key with an active route → end old session, wait for
         # the training pipeline to cycle, start a new session.
         if requested_key and requested_key in known_keys and requested_key in routes:
-            # Reject concurrent refresh for the same key.
-            if requested_key in _refreshing:
-                return Response(
-                    status_code=429,
-                    content=json.dumps(
-                        {"detail": "A refresh is already in progress for this key."}
-                    ).encode(),
-                )
-
             _refreshing.add(requested_key)
+            ready_entry: _ReadyWorkerEntry | None = None
             try:
                 old_route = routes.pop(requested_key)
                 logger.info(
@@ -391,7 +393,7 @@ async def start_session(request: Request):
 
                 # Skip stale ready entries within the deadline.
                 deadline = asyncio.get_running_loop().time() + refresh_timeout
-                ready_entry: _ReadyWorkerEntry | None = None
+                ready_entry = None
                 while True:
                     remaining = deadline - asyncio.get_running_loop().time()
                     if remaining <= 0:
@@ -465,6 +467,9 @@ async def start_session(request: Request):
             except Exception:
                 known_keys.pop(requested_key, None)
                 _reject_future(old_route.pending_future, "Refresh failed unexpectedly")
+                # Also settle the new worker's future if we consumed one
+                if ready_entry is not None:
+                    _reject_future(ready_entry.future, "Refresh failed unexpectedly")
                 raise
             finally:
                 _refreshing.discard(requested_key)
diff --git a/areal/trainer/rl_trainer.py b/areal/trainer/rl_trainer.py
@@ -60,16 +60,17 @@ class _EmptyDataLoader:
     """Minimal dataloader for online mode that yields empty dicts.
 
     Compatible with ``cycle_dataloader()`` and ``len()`` expectations.
-    Each "epoch" produces a single batch of ``batch_size`` empty dicts,
-    so the training loop collects the correct number of trajectories
-    before proceeding to a train step.
+    ``steps_per_epoch`` controls how many steps constitute one epoch,
+    derived from ``total_train_steps // total_train_epochs`` to ensure
+    epoch-frequency-gated components (Saver, RecoverHandler) behave correctly.
     """
 
-    def __init__(self, batch_size: int = 1):
+    def __init__(self, batch_size: int = 1, steps_per_epoch: int = 1):
         self.batch_size = batch_size
+        self._steps_per_epoch = steps_per_epoch
 
     def __len__(self) -> int:
-        return 1  # 1 step per "epoch" for online mode
+        return self._steps_per_epoch
 
     def __iter__(self):
         while True:
@@ -123,9 +124,26 @@ def __init__(
         self.train_dataset = train_dataset
         self.valid_dataset = valid_dataset
         if train_dataset is None:
-            # Online mode: use empty data generator
+            # Online mode: require total_train_steps to compute steps_per_epoch.
+            # Without this, __len__()=1 causes every step to be treated as an
+            # epoch boundary, making Saver/RecoverHandler fire every step and
+            # corrupting the LR schedule.
+            if config.total_train_steps is None:
+                raise ValueError(
+                    "total_train_steps must be set for online mode "
+                    "(train_dataset is None). Both total_train_epochs and "
+                    "total_train_steps are needed to compute steps_per_epoch."
+                )
+            steps_per_epoch = config.total_train_steps // config.total_train_epochs
+            if steps_per_epoch < 1:
+                raise ValueError(
+                    f"total_train_steps ({config.total_train_steps}) must be >= "
+                    f"total_train_epochs ({config.total_train_epochs}) so that "
+                    f"steps_per_epoch >= 1."
+                )
             self.train_dataloader = _EmptyDataLoader(
-                batch_size=config.train_dataset.batch_size
+                batch_size=config.train_dataset.batch_size,
+                steps_per_epoch=steps_per_epoch,
             )
         else:
             self.train_dataloader = self._create_dataloader(
diff --git a/examples/online_rl/config.yaml b/examples/online_rl/config.yaml
@@ -4,6 +4,7 @@ trial_name: trial0
 seed: 1
 enable_offload: false
 total_train_epochs: 10
+total_trian_steps: 100
 tokenizer_path: ${actor.path}
 
 cluster:
diff --git a/tests/experimental/openai/test_proxy_gateway.py b/tests/experimental/openai/test_proxy_gateway.py
@@ -678,10 +678,9 @@ async def test_concurrent_refresh_same_key_returns_429(self):
                 headers=_admin_headers(),
                 json={"task_id": "t", "api_key": "k1"},
             )
-            # The first refresh already popped the route, so the second
-            # request falls through to round-robin (no mock → 500) or
-            # hits the sentinel (429) if the route was still present.
-            assert resp2.status_code in (429, 500)
+            # The _refreshing guard now rejects concurrent refreshes for the
+            # same key before checking `routes`, so this reliably returns 429.
+            assert resp2.status_code == 429
 
             # Clean up.
             refresh1.cancel()