PaddlePaddle · liyonghua0910 · Sep 2, 2025 · Sep 2, 2025 · Oct 13, 2025 · Oct 14, 2025
diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py
@@ -189,7 +189,9 @@ def apply(
 
 class UnquantizedFusedMoEMethod(MoEMethodBase):
     def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
-
+        # num_experts = extra_weight_attrs.pop("num_experts")
+        # hidden_size = extra_weight_attrs.pop("hidden_size")
+        # moe_intermediate_size = extra_weight_attrs.pop("moe_intermediate_size")
         if current_platform.is_cuda():
             self.up_gate_proj_weight_shape = [
                 layer.num_local_experts,

diff --git a/fastdeploy/model_executor/layers/moe/moe.py b/fastdeploy/model_executor/layers/moe/moe.py
@@ -187,7 +187,12 @@ def __init__(
         else:
             self.gate_correction_bias = None
         self.quant_method.create_weights(
-            self, weight_loader=self.weight_loader, model_format=fd_config.model_config.model_format
+            self,
+            weight_loader=self.weight_loader,
+            model_format=fd_config.model_config.model_format,
+            num_experts=self.num_local_experts if self.ep_size > 1 else self.num_experts,
+            hidden_size=self.hidden_size,
+            moe_intermediate_size=self.moe_intermediate_size,
         )
 
         logger.info(