diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS index 36bc2260470..8e648c56e16 100644 --- a/backends/arm/TARGETS +++ b/backends/arm/TARGETS @@ -7,6 +7,8 @@ python_library( "ethosu_partitioner.py", "tosa_backend.py", "tosa_partitioner.py", + "vgf_backend.py", + "vgf_partitioner.py", ], deps = [ ":arm_backend", diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py index 05b101bef7d..bdbbbfd1162 100644 --- a/backends/arm/arm_backend.py +++ b/backends/arm/arm_backend.py @@ -25,13 +25,29 @@ def __init__(self): self.output_format = None self.path_for_intermediates = None self.tosa_spec = None - self.input_order = None + + def vgf_compile_spec( + self, + compiler_flags: Optional[str] = "", + ) -> "ArmCompileSpecBuilder": + """ + Generate compile spec for VGF compatible targets + + Args: + compiler_flags: Extra compiler flags for converter_backend + """ + self.output_format = "vgf" + self.compiler_flags = [ + compiler_flags, + ] + self.tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+MI") + return self def ethosu_compile_spec( self, - config: str, - system_config: str, - memory_mode: str, + target: str, + system_config: Optional[str] = None, + memory_mode: Optional[str] = None, extra_flags: Optional[str] = None, config_ini: Optional[str] = "Arm/vela.ini", ) -> "ArmCompileSpecBuilder": @@ -39,7 +55,7 @@ def ethosu_compile_spec( Generate compile spec for Ethos-U NPU Args: - config: Ethos-U accelerator configuration, e.g. ethos-u55-128 + target: Ethos-U accelerator configuration, e.g. ethos-u55-128 system_config: System configuration to select from the Vel configuration file memory_mode: Memory mode to select from the Vela configuration file @@ -52,9 +68,24 @@ def ethosu_compile_spec( ), f"Output format already set to f{self.output_format}" self.output_format = "vela" self.compiler_flags = [ - f"--accelerator-config={config}", + f"--accelerator-config={target}", f"--config={config_ini}", ] + + # default system config and memory mode + if "ethos-u55" in target: + if system_config is None: + system_config = "Ethos_U55_High_End_Embedded" + if memory_mode is None: + memory_mode = "Shared_Sram" + elif "ethos-u85" in target: + if system_config is None: + system_config = "Ethos_U85_SYS_DRAM_Mid" + if memory_mode is None: + memory_mode = "Sram_Only" + else: + raise RuntimeError(f"Unknown ethos target: {target}") + if system_config is not None: self.compiler_flags.append(f"--system-config={system_config}") if memory_mode is not None: @@ -62,8 +93,13 @@ def ethosu_compile_spec( if extra_flags is not None: self.compiler_flags.append(extra_flags) + # We require raw output and regor, so add these flags if absent. This + # overrides any other output setting. + self.compiler_flags.append("--output-format=raw") + self.compiler_flags.append("--debug-force-regor") + base_tosa_version = "TOSA-0.80+BI" - if "u55" in config: + if "u55" in target: # Add the Ethos-U55 extension marker base_tosa_version += "+u55" self.tosa_spec = TosaSpecification.create_from_string(base_tosa_version) @@ -106,26 +142,22 @@ def build(self) -> List[CompileSpec]: # Always supply a TOSA version self.compile_spec = [CompileSpec("tosa_spec", str(self.tosa_spec).encode())] - if self.output_format == "vela": - self.compile_spec += [ - CompileSpec("output_format", "vela".encode()), - CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()), - ] - elif self.output_format == "tosa": - self.compile_spec.append(CompileSpec("output_format", "tosa".encode())) + # Add compile flags, these are backend specific, refer to the backend + # documentation. + self.compile_spec += [ + CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()), + ] + + # encode output format + self.compile_spec.append( + CompileSpec("output_format", self.output_format.encode()) + ) if self.path_for_intermediates is not None: self.compile_spec.append( CompileSpec("debug_artifact_path", self.path_for_intermediates.encode()) ) - if self.input_order: - self.compile_spec.append( - CompileSpec( - "input_order", " ".join(map(str, self.input_order)).encode() - ) - ) - return self.compile_spec @@ -148,6 +180,13 @@ def is_ethosu(compile_spec: List[CompileSpec]) -> bool: return False +def is_vgf(compile_spec: List[CompileSpec]) -> bool: + for spec in compile_spec: + if spec.key == "output_format": + return spec.value.decode() == "vgf" + return False + + def get_tosa_spec(compile_spec: List[CompileSpec]) -> TosaSpecification: for spec in compile_spec: if spec.key == "tosa_spec": diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py index 2d448afead5..c931d49547f 100644 --- a/backends/arm/arm_vela.py +++ b/backends/arm/arm_vela.py @@ -23,12 +23,11 @@ # Pack either input or output tensor block, compose the related arrays into # per-io structs to simplify runtime use. -def vela_bin_pack_io(prefix, data, shape_order=None): +def vela_bin_pack_io(prefix, data): vela_input_shapes = data[prefix + "_shape"] - order = shape_order if shape_order else range(len(vela_input_shapes)) ios = struct.pack(" bytes: """ Static helper method to do the compilation of the TOSA flatbuffer representation to a target specific binary stream. """ compile_flags = [] - input_order = [] for spec in compile_spec: if spec.key == "compile_flags": compile_flags.append(spec.value.decode()) - if spec.key == "input_order": - input_order = list(map(int, spec.value.decode().split(","))) if len(compile_flags) == 0: # Not testing for compile_flags correctness here, just that they are @@ -60,7 +57,6 @@ def _compile_tosa_flatbuffer( binary = vela_compile( tosa_flatbuffer, compile_flags, - input_order, verbose=logger.getEffectiveLevel() == logging.INFO, ) return binary diff --git a/backends/arm/quantizer/__init__.py b/backends/arm/quantizer/__init__.py index edf995954d6..5cb5c834a98 100644 --- a/backends/arm/quantizer/__init__.py +++ b/backends/arm/quantizer/__init__.py @@ -9,6 +9,7 @@ EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, + VgfQuantizer, ) # Used in tests diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 094819b401e..8b052116ed8 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -27,6 +27,7 @@ from executorch.backends.arm.arm_backend import ( get_tosa_spec, is_ethosu, + is_vgf, ) # usort: skip from executorch.exir.backend.compile_spec_schema import CompileSpec from torch.ao.quantization.fake_quantize import ( @@ -52,6 +53,7 @@ __all__ = [ "TOSAQuantizer", "EthosUQuantizer", + "VgfQuantizer", "get_symmetric_quantization_config", ] @@ -358,3 +360,12 @@ def __init__(self, compile_spec: list[CompileSpec]) -> None: tosa_spec = get_tosa_spec(compile_spec) super().__init__(tosa_spec) + + +class VgfQuantizer(TOSAQuantizer): + def __init__(self, compile_spec: list[CompileSpec]) -> None: + if not is_vgf(compile_spec): + raise RuntimeError("compile spec is not targeting VGF") + + tosa_spec = get_tosa_spec(compile_spec) + super().__init__(tosa_spec) diff --git a/backends/arm/tosa_backend.py b/backends/arm/tosa_backend.py index 1367f194204..fdada0b889a 100644 --- a/backends/arm/tosa_backend.py +++ b/backends/arm/tosa_backend.py @@ -35,15 +35,15 @@ logger = logging.getLogger(__name__) -def _get_first_delegation_tag(graph_module) -> str | None: - """Get the first delegation tag from the graph_module or return None.""" +def arm_get_first_delegation_tag(graph_module) -> str: + """Get the first delegation tag from the graph_module or return empty string.""" for node in graph_module.graph.nodes: tag = node.meta.get("delegation_tag") if tag: return tag logger.debug("No delegation tag found in partition.") - return None + return "" @final @@ -63,7 +63,6 @@ def preprocess( # noqa: C901 artifact_path = None output_format = "" compile_flags = [] - input_order = [] for spec in compile_spec: if spec.key == "debug_artifact_path": artifact_path = spec.value.decode() @@ -71,8 +70,6 @@ def preprocess( # noqa: C901 output_format = spec.value.decode() if spec.key == "compile_flags": compile_flags.append(spec.value.decode()) - if spec.key == "input_order": - input_order = list(map(int, spec.value.decode().split(","))) # Check that the output format is set correctly in the compile spec if output_format != "tosa": @@ -129,14 +126,8 @@ def preprocess( # noqa: C901 dbg_fail(node, graph_module, tosa_graph, artifact_path) raise - if len(input_order) > 0: - if input_count != len(input_order): - raise RuntimeError( - "The rank of the input order is not equal to amount of input tensors" - ) - if artifact_path: - tag = _get_first_delegation_tag(graph_module) + tag = arm_get_first_delegation_tag(graph_module) dbg_tosa_dump( tosa_graph, artifact_path, diff --git a/backends/arm/vgf_backend.py b/backends/arm/vgf_backend.py new file mode 100644 index 00000000000..39e9f6a9b64 --- /dev/null +++ b/backends/arm/vgf_backend.py @@ -0,0 +1,126 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +# +# Main implementation of AoT flow to partition and preprocess for VGF target +# backends. This flow converts via TOSA, to an encoding of TOSA known as VGF +# this form is used where the final JIT compile is performed on target (in the +# runtime delegate executorch::runtime::BackendInterface::init +# + +import logging +import os +import subprocess +import tempfile +from typing import final, List + +from executorch.backends.arm.tosa_backend import ( + arm_get_first_delegation_tag, + TOSABackend, +) +from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult +from executorch.exir.backend.compile_spec_schema import CompileSpec +from torch.export.exported_program import ExportedProgram + +# debug functionality +logger = logging.getLogger(__name__) + + +@final +class VgfBackend(BackendDetails): + """ + BackendDetails subclass for delegation to VGF compatible devices. This enables + encapsulated TOSA on target device and JIT compilation on suitable platforms. + """ + + @staticmethod + def _compile_tosa_flatbuffer( + tosa_flatbuffer: bytes, + compile_spec: List[CompileSpec], + tag_name: str = "", + ) -> bytes: + """ + Static helper method to do the compilation of the TOSA flatbuffer + representation to a target specific binary stream. + """ + compile_flags = [] + artifact_path = None + for spec in compile_spec: + if spec.key == "compile_flags": + compile_flags.append(spec.value.decode()) + if spec.key == "debug_artifact_path": + artifact_path = spec.value.decode() + + # Pass on the TOSA flatbuffer to the vgf compiler. + binary = vgf_compile(tosa_flatbuffer, compile_flags, artifact_path, tag_name) + return binary + + @staticmethod + def preprocess( + edge_program: ExportedProgram, + compile_spec: List[CompileSpec], + ) -> PreprocessResult: + logger.info(f"{VgfBackend.__name__} preprocess") + + # deduce TOSA compile_spec from VGF compile spec. We get a new + # compile spec list, containing only elements relevant for the + # TOSABackend. + tosa_compile_spec = TOSABackend.filter_tosa_compile_specs(compile_spec) + + # Backends doesn't allow inheritance, as stated in comments in exir/backend/backend_api.py + # ('All backend implementation are final...'), so use composition instead. + # preprocess returns the serialized TOSA flatbuffer in .processed_bytes, + # which can be passed on to next compilation step. + tosa_preprocess = TOSABackend.preprocess(edge_program, tosa_compile_spec) + + tag_name = arm_get_first_delegation_tag(edge_program.graph_module) + + binary = VgfBackend._compile_tosa_flatbuffer( + tosa_preprocess.processed_bytes, compile_spec, tag_name + ) + + return PreprocessResult(processed_bytes=binary) + + +def vgf_compile( + tosa_flatbuffer: bytes, + compile_flags: List[str], + artifact_path: str | None = None, + tag_name: str = "", +): + with tempfile.TemporaryDirectory() as tmpdir: + + # We currently write out a flatbuffer as input to the converter + tosaname = f"output_{tag_name}.tosa" + tosa_path = os.path.join(tmpdir, tosaname) + with open(tosa_path, "wb") as f: + f.write(tosa_flatbuffer) + + additional_flags = " ".join(compile_flags) + vgf_path = tosa_path + ".vgf" + conversion_command = ( + f"converter-backend {additional_flags} -i {tosa_path} -o {vgf_path}" + ) + try: + subprocess.run( + [conversion_command], shell=True, check=True, capture_output=True + ) + except subprocess.CalledProcessError as process_error: + raise RuntimeError( + f"Vgf compiler ('{conversion_command}') failed with error:\n \ + {process_error.stderr.decode()}\n \ + Stdout:\n{process_error.stdout.decode()}" + ) + + if artifact_path is not None: + logger.info(f"Emitting debug output to: {vgf_path=}") + os.makedirs(artifact_path, exist_ok=True) + cp = f"cp {vgf_path} {artifact_path}" + subprocess.run(cp, shell=True, check=True, capture_output=False) + + vgf_bytes = open(vgf_path, "rb").read() + return vgf_bytes diff --git a/backends/arm/vgf_partitioner.py b/backends/arm/vgf_partitioner.py new file mode 100644 index 00000000000..0943c01c511 --- /dev/null +++ b/backends/arm/vgf_partitioner.py @@ -0,0 +1,32 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +from typing import final, List, Optional, Sequence + +from executorch.backends.arm.arm_backend import ( + is_vgf, +) # usort: skip +from executorch.backends.arm.tosa_partitioner import TOSAPartitioner +from executorch.backends.arm.vgf_backend import VgfBackend +from executorch.exir.backend.compile_spec_schema import CompileSpec +from executorch.exir.backend.partitioner import DelegationSpec +from torch.fx.passes.operator_support import OperatorSupportBase + + +@final +class VgfPartitioner(TOSAPartitioner): + def __init__( + self, + compile_spec: List[CompileSpec], + additional_checks: Optional[Sequence[OperatorSupportBase]] = None, + ) -> None: + if not is_vgf(compile_spec): + raise RuntimeError("compile spec is not targeting Vgf") + + # Override the delegation spec for Vgf + self.delegation_spec = DelegationSpec(VgfBackend.__name__, compile_spec) + self.additional_checks = additional_checks diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index c5d2a352492..3d6acf2b94a 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -22,12 +22,14 @@ get_tosa_spec, is_ethosu, is_tosa, + is_vgf, ) from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, + VgfQuantizer, ) from executorch.backends.arm.tosa_partitioner import TOSAPartitioner from executorch.backends.arm.tosa_specification import TosaSpecification @@ -36,6 +38,8 @@ GenericModelEvaluator, MobileNetV2Evaluator, ) + +from executorch.backends.arm.vgf_partitioner import VgfPartitioner from executorch.devtools.backend_debug import get_delegation_info from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite @@ -145,6 +149,8 @@ def quantize( quantizer = EthosUQuantizer(compile_specs) elif is_tosa(compile_specs): quantizer = TOSAQuantizer(get_tosa_spec(compile_specs)) + elif is_vgf(compile_specs): + quantizer = VgfQuantizer(compile_specs) else: raise RuntimeError("Unsupported compilespecs for quantization!") @@ -267,6 +273,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): "ethos-u85-512", "ethos-u85-1024", "ethos-u85-2048", + "vgf", "TOSA", ] @@ -317,20 +324,15 @@ def get_compile_spec( except: tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") spec_builder = ArmCompileSpecBuilder().tosa_compile_spec(tosa_spec) - elif "ethos-u55" in target: - spec_builder = ArmCompileSpecBuilder().ethosu_compile_spec( - target, - system_config=system_config, - memory_mode=memory_mode, - extra_flags="--debug-force-regor --output-format=raw --verbose-operators --verbose-cycle-estimate", - ) - elif "ethos-u85" in target: + elif "ethos-u" in target: spec_builder = ArmCompileSpecBuilder().ethosu_compile_spec( target, system_config=system_config, memory_mode=memory_mode, - extra_flags="--output-format=raw --verbose-operators --verbose-cycle-estimate", + extra_flags="--verbose-operators --verbose-cycle-estimate", ) + elif "vgf" in target: + spec_builder = ArmCompileSpecBuilder().vgf_compile_spec() if intermediates is not None: spec_builder.dump_intermediate_artifacts_to(intermediates) @@ -521,22 +523,6 @@ def get_args(): ): raise RuntimeError(f"Model {args.model_name} cannot be delegated.") - if "ethos-u" in args.target and args.system_config is None: - if "u55" in args.target: - args.system_config = "Ethos_U55_High_End_Embedded" - elif "u85" in args.target: - args.system_config = "Ethos_U85_SYS_DRAM_Mid" - else: - raise RuntimeError(f"Invalid target name {args.target}") - - if "ethos-u" in args.target and args.memory_mode is None: - if "u55" in args.target: - args.memory_mode = "Shared_Sram" - elif "u85" in args.target: - args.memory_mode = "Sram_Only" - else: - raise RuntimeError(f"Invalid target name {args.target}") - return args @@ -658,6 +644,8 @@ def to_edge_TOSA_delegate( partitioner = EthosUPartitioner(compile_spec) elif is_tosa(compile_spec): partitioner = TOSAPartitioner(compile_spec) + elif is_vgf(compile_spec): + partitioner = VgfPartitioner(compile_spec) else: raise RuntimeError(f"Unhandled compile spec: {compile_spec}")