We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 1c5b276 commit e251425Copy full SHA for e251425
vllm_omni/worker/gpu_model_runner.py
@@ -739,9 +739,6 @@ def _preprocess(
739
intermediate_tensors: IntermediateTensors | None = None,
740
):
741
"""Align with v0.12 preprocess and omni's additional information handling."""
742
- # Decode payload first, ensure request state has prompt_embeds / additional_information
743
- self._decode_and_store_request_payloads(scheduler_output)
744
-
745
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
746
is_first_rank = get_pp_group().is_first_rank
747
is_encoder_decoder = self.model_config.is_encoder_decoder
0 commit comments