add comments, license, and remove some non necessary code

ganyi1996ppo · ganyi1996ppo · commit e7d8a0100d10 · 2025-08-15T14:10:28.000+08:00
Signed-off-by: ganyi &lt;pleaplusone.gy@gmail.com&gt;
diff --git a/tests/e2e/singlecard/test_graph_rewriter.py b/tests/e2e/singlecard/test_graph_rewriter.py
@@ -1,3 +1,21 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import torch
 import torch.nn as nn
 import torch_npu
@@ -24,9 +42,6 @@ def forward(self, x):
         hidden_states = self.former_linear(x)
         x, residual = torch_npu.npu_add_rms_norm(hidden_states, x, self.weight, self.eps)
         quantized_output = quant_per_tensor(x, self.quant_scale, self.quant_offset)
-        # output = torch_npu.npu_quant_matmul(
-        #     quantized_output, self.post_linear.weight.transpose(1, 0), self.post_linear.bias,
-        #     self.deq_scale, None, torch.int8)
         return quantized_output, residual
 
 
diff --git a/vllm_ascend/compilation/compiler_interface.py b/vllm_ascend/compilation/compiler_interface.py
@@ -1,3 +1,21 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import copy
 import hashlib
 import os
diff --git a/vllm_ascend/compilation/graph_rewrite_pass_manager.py b/vllm_ascend/compilation/graph_rewrite_pass_manager.py
@@ -1,3 +1,21 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 from torch import fx as fx
 
 from vllm.config import VllmConfig
diff --git a/vllm_ascend/compilation/quant_fusion_pass.py b/vllm_ascend/compilation/quant_fusion_pass.py
@@ -1,3 +1,21 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import torch
 from torch.fx.subgraph_rewriter import replace_pattern
 import torch_npu
diff --git a/vllm_ascend/models/qwen3.py b/vllm_ascend/models/qwen3.py
diff --git a/vllm_ascend/ops/layernorm.py b/vllm_ascend/ops/layernorm.py
@@ -21,43 +21,6 @@
 from vllm.model_executor.layers.layernorm import RMSNorm
 
 
-class AddRMSNormW8A8Quant(RMSNorm):
-    # Fuse AddRmsNorm and W8A8 quantization ops together
-
-    def __init__(
-        self,
-        hidden_size: int,
-        layer: torch.nn.Module,
-        eps: float = 1e-6,
-        var_hidden_size: Optional[int] = None,
-        has_weight: bool = True,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        super().__init__(hidden_size, eps, var_hidden_size, has_weight, dtype)
-        self.layer = layer
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: Optional[torch.Tensor] = None,
-    ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
-        import torch_npu
-
-        if residual is not None:
-            x, _, residual = torch_npu.npu_add_rms_norm_quant(
-                x,
-                residual,
-                self.weight,
-                self.layer.aclnn_input_scale,
-                self.layer.aclnn_input_offset,
-                epsilon=self.variance_epsilon)
-            return x, residual
-
-        x, residual = torch_npu.npu_rms_norm(x, self.weight,
-                                             self.variance_epsilon)
-        return x
-
-
 class AscendRMSNorm(RMSNorm):
 
     def forward_oot(
diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py
@@ -102,3 +102,24 @@
 #       Validate more models in all kinds of scenario,
 #       if performance is always improved, we can enable this patch by default and remove the env
 #       variable `VLLM_ASCEND_ENABLE_FUSE_MATMUL_ALLREDUCE` in the future.
+# ** File: worker/patch_common/patch_compilation.py **
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#   1. `vllm.compilation.backends.make_compiler`
+#    Why:
+#       We need to use the `GraphRewriterPassManager` at the compiler interface to actually modify the graph. Therefore we 
+#       need to implement our own compiler interface to do our customized operations.
+#    How:
+#       We implement our own compiler interface `AscendAdaptor` to fetch the `GraphRewriterPassManager` and use it to rewrite the 
+#       piecewise graph cutting by vllm's own backend. This function will just return the `AscendAdaptor` to the  `VllmBackend``.
+#    Related PR (if no, explain why):
+#       - We might add PR to make vllm support custom compiler interface. But its not sure yet.
+#    Future Plan:
+#       We might push the customized compiler interface to the vllm main repo, and leave the backend selection to the platform itself.
+#   2. `vllm.compilation.backends.VllmBackend.configure_post_pass`
+#    Why:
+#       We need register the `GraphRewriterPassManager` to the `VllmBackend` and enable it during 
+#       the compilation. Because we can't directly adopt vllm's inductor pass because torch_npu's limited support on
+#       triton and inductor. So we need to patch this function into the `VllmBackend` to use the `GraphRewriterPassManager`.
+#    How：
+#       This function will inject the `GraphRewriterPassManager` into the inductor config, which is a parameter passed to the compiler interface
+#       and in our customized compiler interface, and in our `AscendAdaptor` we will use this to rewrite the fx graph.