Skip to content

Commit fd5dd1a

Browse files
authored
[Bugfix]fix ep clear buffer perf (#4389)
* fix * Update fused_moe_backend_base.py
1 parent 670aaa3 commit fd5dd1a

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,12 @@ def apply(
168168
Paddle Cutlass compute Fused MoE.
169169
"""
170170
if layer.ep_size > 1:
171-
if layer.fd_config.model_config.moe_phase.phase == "prefill":
171+
if layer.fd_config.model_config.moe_phase.phase == "prefill" and layer.layer_idx == 0:
172172
if layer.fd_config.scheduler_config.splitwise_role == "mixed":
173173
self.ep_prefill_runner.clean_low_latency_buffer()
174174
return self.apply_ep_prefill(layer, x, gate)
175175
else:
176-
if layer.fd_config.scheduler_config.splitwise_role == "mixed":
176+
if layer.fd_config.scheduler_config.splitwise_role == "mixed" and layer.layer_idx == 0:
177177
self.ep_decoder_runner.clean_low_latency_buffer()
178178
return self.apply_ep_decode(layer, x, gate)
179179
else:

0 commit comments

Comments
 (0)