From c5ae0bc9412d382c5bda8518971c92e38c784e67 Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Tue, 24 Mar 2026 14:20:31 +0800 Subject: [PATCH 1/7] merge matmul and add --- fastdeploy/model_executor/layers/linear.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 2bee885ff43..39591c9f656 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -82,11 +82,9 @@ def process_loaded_weights(self, layer, weights) -> None: layer.weight.set_value(weights) def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: - linear_out = paddle.matmul(x, layer.weight) if layer.with_bias: - linear_out = paddle.add(linear_out, layer.bias) - return linear_out - + return paddle._C_ops.linear(x, layer.weight, layer.bias) + return paddle.matmul(x, layer.weight) class LinearBase(nn.Layer): """ From 0100aaa5237cb5fa338e84a0a2936025a3f56197 Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Tue, 24 Mar 2026 14:27:14 +0800 Subject: [PATCH 2/7] modify format --- fastdeploy/model_executor/layers/linear.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 39591c9f656..5a8b549bad3 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -86,6 +86,7 @@ def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: return paddle._C_ops.linear(x, layer.weight, layer.bias) return paddle.matmul(x, layer.weight) + class LinearBase(nn.Layer): """ LinearBase Layer. From 47cbb7b846652bf6380d004436c44c3eb3f5531c Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Tue, 24 Mar 2026 16:56:27 +0800 Subject: [PATCH 3/7] using paddle.nn.functional.linear --- fastdeploy/model_executor/layers/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 5a8b549bad3..c13f8d4dec2 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -83,7 +83,7 @@ def process_loaded_weights(self, layer, weights) -> None: def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: if layer.with_bias: - return paddle._C_ops.linear(x, layer.weight, layer.bias) + return paddle.nn.functional.linear(x, layer.weight, layer.bias) return paddle.matmul(x, layer.weight) From a89ec24a024a582b71079be7a49bfa853ff1ec4b Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Thu, 26 Mar 2026 22:07:46 +0800 Subject: [PATCH 4/7] using _C_ops.linear --- fastdeploy/model_executor/layers/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index c13f8d4dec2..5a8b549bad3 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -83,7 +83,7 @@ def process_loaded_weights(self, layer, weights) -> None: def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: if layer.with_bias: - return paddle.nn.functional.linear(x, layer.weight, layer.bias) + return paddle._C_ops.linear(x, layer.weight, layer.bias) return paddle.matmul(x, layer.weight) From c594ab1b08cc0d656820b24422ee71c64ec0170c Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Fri, 27 Mar 2026 11:48:28 +0800 Subject: [PATCH 5/7] using paddle.nn.functional.linear --- fastdeploy/model_executor/layers/linear.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 5a8b549bad3..468a608cd40 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -82,9 +82,8 @@ def process_loaded_weights(self, layer, weights) -> None: layer.weight.set_value(weights) def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: - if layer.with_bias: - return paddle._C_ops.linear(x, layer.weight, layer.bias) - return paddle.matmul(x, layer.weight) + out = paddle.nn.functional.linear(x, layer.weight, layer.bias if layer.with_bias else None) + return out class LinearBase(nn.Layer): From d7bca72dcce9e4ffd7b1432ad9fe4470b4cd28a5 Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Sun, 29 Mar 2026 17:10:40 +0800 Subject: [PATCH 6/7] add FLAGS_use_legacy_linear env var in test case --- tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py | 1 + tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py | 1 + tests/e2e/test_EB_Lite_serving.py | 1 + tests/e2e/test_Qwen3VLMoe_serving.py | 1 + tests/e2e/test_ernie_03b_pd_router_v1_rdma_global_cache.py | 1 + 5 files changed, 5 insertions(+) diff --git a/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py b/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py index 4024b57613b..dd662101166 100644 --- a/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py +++ b/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py @@ -35,6 +35,7 @@ is_port_open, ) +os.environ["FLAGS_use_legacy_linear"] = "1" @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): diff --git a/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py b/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py index 9a46b9cd0a2..a797e0432b0 100644 --- a/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py +++ b/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py @@ -35,6 +35,7 @@ is_port_open, ) +os.environ["FLAGS_use_legacy_linear"] = "1" @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): diff --git a/tests/e2e/test_EB_Lite_serving.py b/tests/e2e/test_EB_Lite_serving.py index 9d36fa672a9..a72bd8673fd 100644 --- a/tests/e2e/test_EB_Lite_serving.py +++ b/tests/e2e/test_EB_Lite_serving.py @@ -32,6 +32,7 @@ is_port_open, ) +os.environ["FLAGS_use_legacy_linear"] = "1" @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): diff --git a/tests/e2e/test_Qwen3VLMoe_serving.py b/tests/e2e/test_Qwen3VLMoe_serving.py index 0ce1ff53224..142ea47a5b7 100644 --- a/tests/e2e/test_Qwen3VLMoe_serving.py +++ b/tests/e2e/test_Qwen3VLMoe_serving.py @@ -30,6 +30,7 @@ is_port_open, ) +os.environ["FLAGS_use_legacy_linear"] = "1" @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): diff --git a/tests/e2e/test_ernie_03b_pd_router_v1_rdma_global_cache.py b/tests/e2e/test_ernie_03b_pd_router_v1_rdma_global_cache.py index bf5ec6dd1b6..62f5cd08f25 100644 --- a/tests/e2e/test_ernie_03b_pd_router_v1_rdma_global_cache.py +++ b/tests/e2e/test_ernie_03b_pd_router_v1_rdma_global_cache.py @@ -38,6 +38,7 @@ is_port_open, ) +os.environ["FLAGS_use_legacy_linear"] = "1" # Read ports from environment variables; use default values if not set FD_CONNECTOR_PORT = int(os.getenv("FD_CONNECTOR_PORT", 8433)) FD_ROUTER_PORT = int(os.getenv("FD_ROUTER_PORT", 8533)) From 70c2524fb501f3bca552442958680539cdc23477 Mon Sep 17 00:00:00 2001 From: Bingoo <1575938147@qq.com> Date: Sun, 29 Mar 2026 22:22:50 +0800 Subject: [PATCH 7/7] fix format --- tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py | 1 + tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py | 1 + tests/e2e/test_EB_Lite_serving.py | 1 + tests/e2e/test_Qwen3VLMoe_serving.py | 1 + 4 files changed, 4 insertions(+) diff --git a/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py b/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py index dd662101166..4fc12f07023 100644 --- a/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py +++ b/tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py @@ -37,6 +37,7 @@ os.environ["FLAGS_use_legacy_linear"] = "1" + @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): """ diff --git a/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py b/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py index a797e0432b0..c31dec46832 100644 --- a/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py +++ b/tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py @@ -37,6 +37,7 @@ os.environ["FLAGS_use_legacy_linear"] = "1" + @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): """ diff --git a/tests/e2e/test_EB_Lite_serving.py b/tests/e2e/test_EB_Lite_serving.py index a72bd8673fd..2b77e3cc2be 100644 --- a/tests/e2e/test_EB_Lite_serving.py +++ b/tests/e2e/test_EB_Lite_serving.py @@ -34,6 +34,7 @@ os.environ["FLAGS_use_legacy_linear"] = "1" + @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): """ diff --git a/tests/e2e/test_Qwen3VLMoe_serving.py b/tests/e2e/test_Qwen3VLMoe_serving.py index 142ea47a5b7..e695b039306 100644 --- a/tests/e2e/test_Qwen3VLMoe_serving.py +++ b/tests/e2e/test_Qwen3VLMoe_serving.py @@ -32,6 +32,7 @@ os.environ["FLAGS_use_legacy_linear"] = "1" + @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): """