[Clean] Remove the redundant decoding payloads logic (vllm-project#404)

gcanlin · wangyu31577 · commit e2514255a5c3 · 2025-12-29T12:19:22.000+08:00
Signed-off-by: gcanlin &lt;canlinguosdu@gmail.com&gt;
Signed-off-by: wangyu31577 &lt;wangyu31577@hundsun.com&gt;
diff --git a/vllm_omni/worker/gpu_model_runner.py b/vllm_omni/worker/gpu_model_runner.py
@@ -739,9 +739,6 @@ def _preprocess(
         intermediate_tensors: IntermediateTensors | None = None,
     ):
         """Align with v0.12 preprocess and omni's additional information handling."""
-        # Decode payload first, ensure request state has prompt_embeds / additional_information
-        self._decode_and_store_request_payloads(scheduler_output)
-
         num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
         is_first_rank = get_pp_group().is_first_rank
         is_encoder_decoder = self.model_config.is_encoder_decoder