Add pass to remove unused parameters in to_executorch

GregoryComer · GregoryComer · commit 3087578f2600 · 2025-04-25T11:47:04.000-07:00
Summary:
Currently, ExecuTorch will serialize any parameters in the exported program, regardless of whether they are actually used. Exporting with strict=True will remove unused parameters, but strict=False will not. Export recently switched to non-strict as the default behavior.

This causes PTE bloat when doing pt2e quantization (unquantized weights are left in the graph) or sometimes when exporting multiple methods (encode and decoder, for example).

This PR adds a new pass (`remove_unused_parameters_pass`) to strip unused parameters from the `ExportedProgram`. It is run as part of `to_executorch`. Parameters are considered unused if there are no uses of the placeholder node. Parameters are removed by stripping them from the state_dict, input specs, and graph.

As a question for reviewers, should we run this pass earlier, as part of to_edge? My rationale for running as part of to_executorch was that it could theoretically clean up anything else left by partitioning and lowering, but I'm not aware of any concrete use cases for this.

Differential Revision: D73654202
diff --git a/exir/passes/TARGETS b/exir/passes/TARGETS
@@ -21,6 +21,7 @@ python_library(
         ":quant_fusion_pass",
         ":quantize_io_pass",
         ":remove_noop_pass",
+        ":remove_unused_parameters_pass",
         ":replace_aten_with_edge_pass",
         ":replace_broken_ops_with_function_ops_pass",
         ":replace_edge_with_backend_pass",
@@ -386,3 +387,14 @@ python_library(
         "//executorch/exir/dialects:lib",
     ],
 )
+
+python_library(
+    name = "remove_unused_parameters_pass",
+    srcs = [
+        "remove_unused_parameters_pass.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir/dialects:lib",
+    ],
+)
diff --git a/exir/passes/__init__.py b/exir/passes/__init__.py
@@ -45,6 +45,9 @@
 from executorch.exir.passes.prune_empty_tensors_pass import PruneEmptyTensorsPass
 from executorch.exir.passes.quant_fusion_pass import QuantFusionPass
 from executorch.exir.passes.remove_noop_pass import RemoveNoopPass, RemoveToCopyPass
+from executorch.exir.passes.remove_unused_parameters_pass import (
+    remove_unused_parameters_pass,
+)
 from executorch.exir.passes.replace_aten_with_edge_pass import OpReplacePass
 from executorch.exir.passes.replace_broken_ops_with_function_ops_pass import (
     ReplaceBrokenOpsWithFunctionalOpsPass,
@@ -71,6 +74,7 @@
     "MemoryPlanningPass",
     "HintBasedSymShapeEvalPass",
     "insert_write_back_for_buffers_pass",
+    "remove_unused_parameters_pass",
     "weights_to_outputs_pass",
 ]
 
diff --git a/exir/passes/remove_unused_parameters_pass.py b/exir/passes/remove_unused_parameters_pass.py
@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import torch
+
+from torch.export.exported_program import ExportedProgram, InputKind
+
+
+def remove_unused_parameters_pass(
+    ep: ExportedProgram,
+) -> ExportedProgram:
+    """
+    Remove unused parameters from the exported program.
+    """
+
+    placeholder_nodes = {
+        node.target: node
+        for node in ep.graph_module.graph.nodes
+        if node.op == "placeholder"
+    }
+
+    unused_parameters = [
+        s
+        for s in ep.graph_signature.input_specs
+        if s.kind == InputKind.PARAMETER
+        and not _is_parameter_used(ep, s.arg.name, placeholder_nodes)
+    ]
+
+    # Remove params from the state dict, graph, and signature.
+    new_signature = copy.deepcopy(ep.graph_signature)
+    for param in unused_parameters:
+        new_signature.input_specs.remove(param)
+        del ep._state_dict[param.target]
+        ep.graph_module.graph.erase_node(placeholder_nodes[param.arg.name])
+
+    ep._graph_signature = new_signature
+    ep.graph_module.recompile()
+    return ep
+
+
+def _is_parameter_used(
+    ep: ExportedProgram, parameter: str, placeholder_nodes: dict[str, torch.fx.Node]
+) -> bool:
+    placeholder_node = placeholder_nodes.get(parameter)
+    if placeholder_node is None:
+        # Shouldn't happen, but in this case, leave the parameter to be safe.
+        return True
+
+    return len(placeholder_node.users) > 0
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -41,6 +41,7 @@
     EdgeToBackendOpsPass,
     MemoryFormatOpsPass,
     OpReplacePass,
+    remove_unused_parameters_pass,
 )
 from executorch.exir.passes.external_constants_pass import (
     external_constants_pass,
@@ -1529,6 +1530,7 @@ def to_executorch(
         for name, program in self._edge_programs.items():
             program = weights_to_outputs_pass(program)
             program = unsafe_remove_auto_functionalized_pass(program)
+            program = remove_unused_parameters_pass(program)
             gm, new_signature = insert_write_back_for_buffers_pass(program)
             new_gm = program.graph_module
             for p in edge_to_executorch_passes(config, name):
diff --git a/exir/tests/TARGETS b/exir/tests/TARGETS
@@ -432,6 +432,22 @@ python_unittest(
     ],
 )
 
+python_unittest(
+    name = "test_remove_unused_parameters_pass",
+    srcs = [
+        "test_remove_unused_parameters_pass.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/backends/xnnpack:xnnpack_delegate",
+        "//executorch/exir:lib",
+        "//executorch/exir:memory",
+        "//executorch/exir/capture:config",
+        "//executorch/exir/passes:lib",
+        "//executorch/runtime:runtime",
+    ],
+)
+
 python_unittest(
     name = "test_remove_view_copy",
     srcs = [
diff --git a/exir/tests/test_remove_unused_parameters_pass.py b/exir/tests/test_remove_unused_parameters_pass.py
@@ -0,0 +1,115 @@
+import unittest
+from typing import Sequence
+
+import torch
+
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+from executorch.exir import to_edge_transform_and_lower
+from executorch.exir.passes import remove_unused_parameters_pass
+from executorch.runtime import Runtime
+from torch.export import ExportedProgram
+
+
+class TestRemoveUnusedParametersPass(unittest.TestCase):
+    class ModelWithUnusedParameters(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.linear1 = torch.nn.Linear(16, 16)
+            self.unused_linear = torch.nn.Linear(1024, 1024)
+
+        def forward(self, x):
+            return self.linear1(x)
+
+    def _test_pass(
+        self,
+        ep: ExportedProgram,
+        unused_param_names_and_args: dict[str, str],
+        example_inputs: Sequence[torch.Tensor],
+        expected_outputs: torch.Tensor,
+    ):
+        # Verify EP state before running the pass.
+        placeholders = set(
+            n.target for n in ep.graph_module.graph.nodes if n.op == "placeholder"
+        )
+        for param_name, param_arg in unused_param_names_and_args.items():
+            self.assertIn(param_name, ep.state_dict.keys())
+            self.assertIn(param_name, ep.graph_signature.parameters)
+            self.assertIn(param_arg, placeholders)
+
+        new_ep = remove_unused_parameters_pass(ep)
+
+        # Verify that the unused params are not in the state dict,
+        # graph signature, or graph.
+        new_placeholders = set(
+            n.target for n in new_ep.graph_module.graph.nodes if n.op == "placeholder"
+        )
+        for param_name, param_arg in unused_param_names_and_args.items():
+            self.assertNotIn(param_name, new_ep.state_dict.keys())
+            self.assertNotIn(param_name, new_ep.graph_signature.parameters)
+            self.assertNotIn(param_arg, new_placeholders)
+
+        # Verify that the outputs are unchanged.
+        new_outputs = new_ep.module()(*example_inputs)
+        self.assertTrue(torch.allclose(new_outputs, expected_outputs))
+
+    def test_remove_unused_parameters_simple(self):
+        model = self.ModelWithUnusedParameters()
+        model.eval()
+        example_inputs = (torch.randn(1, 16),)
+        eager_outputs = model(*example_inputs)
+        ep = torch.export.export(model, example_inputs, strict=False)
+
+        unused_param_names_and_args = {
+            "unused_linear.weight": "p_unused_linear_weight",
+            "unused_linear.bias": "p_unused_linear_bias",
+        }
+
+        self._test_pass(ep, unused_param_names_and_args, example_inputs, eager_outputs)
+
+    def test_remove_unused_parameters_simple_edge_dialect(self):
+        model = self.ModelWithUnusedParameters()
+        model.eval()
+        example_inputs = (torch.randn(1, 16),)
+        eager_outputs = model(*example_inputs)
+
+        unused_param_names_and_args = {
+            "unused_linear.weight": "p_unused_linear_weight",
+            "unused_linear.bias": "p_unused_linear_bias",
+        }
+
+        for delegated in [False, True]:
+            lowered = to_edge_transform_and_lower(
+                torch.export.export(model, example_inputs, strict=False),
+                partitioner=[XnnpackPartitioner()] if delegated else [],
+            )
+
+            self._test_pass(
+                lowered.exported_program(),
+                unused_param_names_and_args,
+                example_inputs,
+                eager_outputs,
+            )
+
+    def test_remove_unused_parameters_serialized_e2e(self):
+        model = self.ModelWithUnusedParameters()
+        model.eval()
+        example_inputs = (torch.randn(1, 16),)
+        eager_outputs = model(*example_inputs)
+
+        # Pass is expected to run as part of to_executorch().
+        lowered = to_edge_transform_and_lower(
+            torch.export.export(model, example_inputs, strict=False),
+        ).to_executorch()
+
+        # There are approximately 1M unused fp32 parameters - ~4Mb.
+        # Without the unused params, the expected size is ~2.5Kb.
+        self.assertLess(len(lowered.buffer), 10000)
+
+        # Make sure we can load and run the serialized .pte.
+        runtime = Runtime.get()
+        program = runtime.load_program(lowered.buffer)
+        method = program.load_method("forward")
+        runtime_outputs = method.execute([*example_inputs])
+
+        self.assertEqual(1, len(runtime_outputs))
+        self.assertTrue(torch.allclose(runtime_outputs[0], eager_outputs))