HonestDeng · HonestDeng · commit 3457e15f8f93 · 2025-12-27T19:03:00.000+08:00
Signed-off-by: HonestDeng &lt;2958906959@qq.com&gt;
diff --git a/vllm_omni/entrypoints/omni_stage.py b/vllm_omni/entrypoints/omni_stage.py
@@ -497,13 +497,15 @@ def _stage_worker(
                 num_devices_to_lock = len(devices_to_lock)
 
                 logger.debug(
-                    "[Stage-%s] Parallel config: TP=%d, PP=%d, DP=%d, PCP=%d; will lock %d devices: %s",
+                    "[Stage-%s] Parallel config: TP=%d, PP=%d, DP=%d, PCP=%d, SP=%d "
+                    "(devices_per_stage=%d); will lock %d devices: %s",
                     stage_id,
                     tensor_parallel_size,
                     pipeline_parallel_size,
                     data_parallel_size,
                     prefill_context_parallel_size,
                     sequence_parallel_size,
+                    num_devices_per_stage,
                     num_devices_to_lock,
                     devices_to_lock,
                 )
@@ -972,7 +974,8 @@ async def _stage_worker_async(
                                 # Check if we've been waiting too long
                                 if _time.time() - wait_start > max_wait_time:
                                     logger.warning(
-                                        "[Stage-%s] Timeout waiting for device %s initialization lock, proceeding anyway",
+                                        "[Stage-%s] Timeout waiting for device %s "
+                                        "initialization lock, proceeding anyway",
                                         stage_id,
                                         device_id,
                                     )
diff --git a/vllm_omni/model_executor/models/mammoth_moda2/mammoth_moda2_ar.py b/vllm_omni/model_executor/models/mammoth_moda2/mammoth_moda2_ar.py
@@ -619,8 +619,9 @@ def forward(
         inputs_embeds: torch.Tensor | None = None,
         **kwargs: Any,
     ):
-        # vllm-omni runner passes sampling_metadata and runtime_additional_information in each forward step.
-        # compute_logits is called immediately after forward, so caching here enables step-by-step dynamic token constraints.
+        # vllm-omni runner passes sampling_metadata and runtime_additional_information
+        # in each forward step. compute_logits is called immediately after
+        # forward, so caching here enables step-by-step dynamic token constraints.
         runtime_infos = kwargs.get("runtime_additional_information")
         self._last_runtime_additional_information = runtime_infos if isinstance(runtime_infos, list) else None
         hidden_states = super().forward(
diff --git a/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/attention_processor.py b/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/attention_processor.py
@@ -458,7 +458,9 @@ def __call__(
         key = key.transpose(1, 2)
         value = value.transpose(1, 2)
 
-        # explicitly repeat key and value to match query length, otherwise using enable_gqa=True results in MATH backend of sdpa in our test of pytorch2.6
+        # Explicitly repeat key and value to match query length; otherwise using
+        # enable_gqa=True can fall back to the MATH backend of SDPA in our
+        # PyTorch 2.6 tests.
         key = key.repeat_interleave(query.size(-3) // key.size(-3), -3)
         value = value.repeat_interleave(query.size(-3) // value.size(-3), -3)
 
diff --git a/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/block_lumina2.py b/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/block_lumina2.py
@@ -121,7 +121,8 @@ def forward(
         x: torch.Tensor,
         conditioning_embedding: torch.Tensor,
     ) -> torch.Tensor:
-        # convert back to the original dtype in case `conditioning_embedding`` is upcasted to float32 (needed for hunyuanDiT)
+        # Convert back to the original dtype in case `conditioning_embedding`
+        # is upcasted to float32 (needed for hunyuanDiT).
         scale = self.linear_1(self.silu(conditioning_embedding).to(x.dtype))
         x = self.norm(x) * (1 + scale)[:, None, :]
 
diff --git a/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/transport/transport.py b/vllm_omni/model_executor/models/mammoth_moda2/mammothmoda2_dit/transport/transport.py
@@ -90,8 +90,12 @@ def prior_logp(self, z):
         """
         shape = th.tensor(z.size())
         N = th.prod(shape[1:])
-        _fn = lambda x: -N / 2.0 * np.log(2 * np.pi) - th.sum(x**2) / 2.0
-        return th.vmap(_fn)(z)
+
+        # Use a nested def (instead of lambda) to satisfy ruff E731.
+        def _prior_logp_one(x):
+            return -N / 2.0 * np.log(2 * np.pi) - th.sum(x**2) / 2.0
+
+        return th.vmap(_prior_logp_one)(z)
 
     def check_interval(
         self,
@@ -154,7 +158,7 @@ def sample(self, x1, process_index, num_processes):
                     t[_] = 0.0
             # print(t)
         else:
-            raise NotImplementedError("Not implemented snr_type %s" % self.snr_type)
+            raise NotImplementedError(f"Not implemented snr_type {self.snr_type}")
 
         if self.do_shift:
             if self.dynamic_time_shift:
@@ -306,16 +310,18 @@ def get_score(
         """member function for obtaining score of
         x_t = alpha_t * x + sigma_t * eps"""
         if self.model_type == ModelType.NOISE:
-            score_fn = (
-                lambda x, t, model, **kwargs: model(x, t, **kwargs)
-                / -self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
-            )
+
+            def score_fn(x, t, model, **kwargs):
+                sigma = self.path_sampler.compute_sigma_t(path.expand_t_like_x(t, x))[0]
+                return model(x, t, **kwargs) / -sigma
         elif self.model_type == ModelType.SCORE:
-            score_fn = lambda x, t, model, **kwagrs: model(x, t, **kwagrs)
+
+            def score_fn(x, t, model, **kwargs):
+                return model(x, t, **kwargs)
         elif self.model_type == ModelType.VELOCITY:
-            score_fn = lambda x, t, model, **kwargs: self.path_sampler.get_score_from_velocity(
-                model(x, t, **kwargs), x, t
-            )
+
+            def score_fn(x, t, model, **kwargs):
+                return self.path_sampler.get_score_from_velocity(model(x, t, **kwargs), x, t)
         else:
             raise NotImplementedError()
 
diff --git a/vllm_omni/model_executor/models/mammoth_moda2/tokenization_mammothmoda2_qwen2_5_vl.py b/vllm_omni/model_executor/models/mammoth_moda2/tokenization_mammothmoda2_qwen2_5_vl.py
@@ -44,7 +44,10 @@
     "special_tokens_file": "mammothu_vision_tokens.txt",
 }
 
-PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
+PAT_STR = (
+    r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?"""
+    r"""[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
+)
 ENDOFTEXT = "<|endoftext|>"
 IMSTART = "<|im_start|>"
 IMEND = "<|im_end|>"