From 61c6310e848e9dae873e7efe43d3285ebf5df3ef Mon Sep 17 00:00:00 2001 From: Zingo Andersen Date: Mon, 13 Oct 2025 15:18:53 +0200 Subject: [PATCH] Arm backend: ArmTester support testing with portable ops and faster simulator This will enable model testing where some ops are not fully delegated, like done in the backend flow test suite. This also remove the unused fast_fvp pytest flag and always run the Corstone-300/320 in fast mode as we are never interesting in PMU cycle numbers in the pytesting anyway, this gives more then a 2x speedboost on some models. This will also extend the FVP timeout to 20 min for backend flows and increase allocation pool to 80MB to make it possible to tests larger models. For normal unit tests the timeout is set to 10 min Signed-off-by: Zingo Andersen Change-Id: Ic37b2107f8903b6a4b890ce3a7ebfb9abc4261ab --- backends/arm/test/common.py | 4 ++-- backends/arm/test/conftest.py | 5 ----- backends/arm/test/ops/test_permute.py | 6 +----- backends/arm/test/runner_utils.py | 26 +++++++++++++------------- backends/arm/test/setup_testing.sh | 21 +++++++++++++++++++-- backends/arm/test/tester/arm_tester.py | 10 ++++++++-- backends/arm/test/tester/serialize.py | 5 ++++- backends/test/suite/flows/arm.py | 6 +++++- 8 files changed, 52 insertions(+), 31 deletions(-) diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index d8c7ae1a570..1125df6b402 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -87,7 +87,7 @@ def get_u55_compile_spec( macs: int = 128, system_config: str = "Ethos_U55_High_End_Embedded", memory_mode: str = "Shared_Sram", - extra_flags: str = "--debug-force-regor --output-format=raw", + extra_flags: str = "--debug-force-regor --output-format=raw --arena-cache-size=2097152", custom_path: Optional[str] = None, config: Optional[str] = None, tosa_debug_mode: EthosUCompileSpec.DebugMode | None = None, @@ -122,7 +122,7 @@ def get_u85_compile_spec( macs: int = 128, system_config="Ethos_U85_SYS_DRAM_Mid", memory_mode="Shared_Sram", - extra_flags="--output-format=raw", + extra_flags="--output-format=raw --arena-cache-size=2097152", custom_path: Optional[str] = None, config: Optional[str] = None, tosa_debug_mode: EthosUCompileSpec.DebugMode | None = None, diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index 0060bf0ea63..8a08c74efc4 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -25,10 +25,6 @@ def pytest_configure(config): if getattr(config.option, "llama_inputs", False) and config.option.llama_inputs: pytest._test_options["llama_inputs"] = config.option.llama_inputs # type: ignore[attr-defined] - pytest._test_options["fast_fvp"] = False # type: ignore[attr-defined] - if getattr(config.option, "fast_fvp", False): - pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] - pytest._test_options["tosa_version"] = "1.0" # type: ignore[attr-defined] if config.option.arm_run_tosa_version: pytest._test_options["tosa_version"] = config.option.arm_run_tosa_version @@ -49,7 +45,6 @@ def try_addoption(*args, **kwargs): try_addoption("--arm_quantize_io", action="store_true", help="Deprecated.") try_addoption("--arm_run_corstoneFVP", action="store_true", help="Deprecated.") - try_addoption("--fast_fvp", action="store_true") try_addoption( "--llama_inputs", nargs="+", diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py index aa2f49b5e53..c9fe32bf86c 100644 --- a/backends/arm/test/ops/test_permute.py +++ b/backends/arm/test/ops/test_permute.py @@ -76,11 +76,7 @@ def test_permute_tosa_INT(test_data: torch.Tensor): pipeline.run() -@common.parametrize( - "test_data", - test_data_suite, - xfails={"rank_4_3": "MLETORCH-955 : Permutation numerical diff for u55"}, -) +@common.parametrize("test_data", test_data_suite) @common.XfailIfNoCorstone300 def test_permute_u55_INT(test_data): test_data, dims = test_data() diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index bc890d53bc4..ae1fc136ce7 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -28,7 +28,6 @@ ) from executorch.backends.arm.ethosu import EthosUCompileSpec -from executorch.backends.arm.test.conftest import is_option_enabled from executorch.backends.arm.tosa.compile_spec import TosaCompileSpec from executorch.backends.arm.tosa.specification import Tosa_1_00, TosaSpecification from executorch.backends.arm.vgf import VgfCompileSpec @@ -414,10 +413,6 @@ def run_corstone( "The argument passed to the FVP should be less than 256 characters long, otherwise it gets truncated" ) - ethos_u_extra_args = "" - if is_option_enabled("fast_fvp"): - ethos_u_extra_args = ethos_u_extra_args + "--fast" - match target_board: case "corstone-300": command_args = [ @@ -435,12 +430,12 @@ def run_corstone( "-C", "cpu0.semihosting-stack_base=0", "-C", - f"ethosu.extra_args='{ethos_u_extra_args}'", - "-C", "cpu0.semihosting-heap_limit=0", "-C", f"cpu0.semihosting-cwd={intermediate_path}", "-C", + "ethosu.extra_args='--fast'", + "-C", f"cpu0.semihosting-cmd_line='{cmd_line}'", "-a", str(elf_path), @@ -473,7 +468,7 @@ def run_corstone( "-C", f"mps4_board.subsystem.cpu0.semihosting-cwd={intermediate_path}", "-C", - f"mps4_board.subsystem.ethosu.extra_args='{ethos_u_extra_args}'", + "mps4_board.subsystem.ethosu.extra_args='--fast'", "-C", f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'", "-a", @@ -719,20 +714,25 @@ def assert_elf_path_exists(elf_path): ) -def get_elf_path(target_board): +def get_elf_path(target_board: str, use_portable_ops: bool = False): if target_board not in VALID_TARGET: raise ValueError(f"Unsupported target: {target_board}") + if use_portable_ops: + portable_ops_str = "portable-ops_" + else: + portable_ops_str = "" + if target_board in ("corstone-300", "corstone-320"): elf_path = os.path.join( "arm_test", - f"arm_semihosting_executor_runner_{target_board}", + f"arm_semihosting_executor_runner_{portable_ops_str}{target_board}", "arm_executor_runner", ) assert_elf_path_exists(elf_path) elif target_board == "vkml_emulation_layer": elf_path = os.path.join( - "arm_test/arm_executor_runner_vkml", + f"arm_test/arm_executor_runner_{portable_ops_str}vkml", "executor_runner", ) assert_elf_path_exists(elf_path) @@ -740,9 +740,9 @@ def get_elf_path(target_board): return elf_path -def arm_executor_runner_exists(target_board): +def arm_executor_runner_exists(target_board: str, use_portable_ops: bool = False): try: - get_elf_path(target_board) + get_elf_path(target_board, use_portable_ops=use_portable_ops) except: return False else: diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index d1e4725d93b..bb68361c238 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -10,6 +10,23 @@ script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") et_root_dir=$(realpath "${script_dir}/../../..") build_executor_runner=${et_root_dir}/backends/arm/scripts/build_executor_runner.sh build_root_test_dir=${et_root_dir}/arm_test/arm_semihosting_executor_runner +extraflags="-DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=83886080" -${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --output="${build_root_test_dir}_corstone-300" -${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --output="${build_root_test_dir}_corstone-320" +# By default tests with an elf without any portable_ops +# If you supply use_portable_ops=True when creating the ArmTester() +# you will instead test with some portable ops compiled in, see list below. + +#--target --system_config --memory_mode should match the ArmTester used setup see backends/arm/test/common.py + +${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --output="${build_root_test_dir}_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --output="${build_root_test_dir}_corstone-320" --extra_build_flags=${extraflags} + +# List of portable ops used by testing, this is mainly used to test models in the flow +# test setup to make sure models that are not fully delegated can still be tested and run OK +# To use this you can set use_portable_ops=True when creating ArmTester() + +portable_ops_list_u55="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::squeeze_copy.dims,dim_order_ops::_clone_dim_order.out" +portable_ops_list_u85="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::full_like.out,aten::bmm.out,aten::scalar_tensor.out,aten::index.Tensor_out,aten::where.self_out" + +${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --select_ops_list="${portable_ops_list_u55}" --output="${build_root_test_dir}_portable-ops_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --select_ops_list="${portable_ops_list_u85}" --output="${build_root_test_dir}_portable-ops_corstone-320" --extra_build_flags=${extraflags} diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index 604253b6c92..d5484dacbe1 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -250,6 +250,8 @@ def __init__( transform_passes: Optional[ Union[Sequence[PassType], Dict[str, Sequence[PassType]]] ] = None, + use_portable_ops: bool = False, + timeout: int = 600, ): """ Args: @@ -271,6 +273,8 @@ def __init__( # Initial model needs to be set as a *possible* but not yet added Stage, therefore add None entry. self.stages[StageType.INITIAL_MODEL] = None self._run_stage(InitialModel(self.original_module)) + self.use_portable_ops = use_portable_ops + self.timeout = timeout def quantize( self, @@ -348,13 +352,15 @@ def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = Non return super().to_executorch(to_executorch_stage) def serialize( - self, serialize_stage: Optional[Serialize] = None, timeout: int = 480 + self, + serialize_stage: Optional[Serialize] = None, ): if serialize_stage is None: serialize_stage = Serialize( compile_spec=self.compile_spec, module=self.original_module, - timeout=timeout, + use_portable_ops=self.use_portable_ops, + timeout=self.timeout, ) assert ( self.compile_spec.get_intermediate_path() is not None diff --git a/backends/arm/test/tester/serialize.py b/backends/arm/test/tester/serialize.py index f0fd246b3a6..33e57cc721d 100644 --- a/backends/arm/test/tester/serialize.py +++ b/backends/arm/test/tester/serialize.py @@ -31,12 +31,14 @@ def __init__( self, compile_spec: ArmCompileSpec, module: Optional[torch.nn.Module], + use_portable_ops: bool = False, timeout: int = 120, ): """ Args: compile_spec: CompileSpecs to be used for serialization. module: Original Module to be used for serialization. Optional - can be used for reference output generation. + portable_ops: If True tests with compiled in portable ops, default is to test without this to get error if not fully delegated timeout: Timeout for fvp. Default is 120 seconds. """ super().__init__() @@ -44,6 +46,7 @@ def __init__( self.timeout = timeout self.executorch_program_manager: ExecutorchProgramManager | None self.compile_spec = compile_spec + self.use_portable_ops = use_portable_ops def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None: super().run(artifact, inputs) @@ -58,7 +61,7 @@ def run_artifact(self, inputs): inputs_flattened, _ = tree_flatten(inputs) intermediate_path = self.compile_spec.get_intermediate_path() target_board = get_target_board(self.compile_spec) - elf_path = get_elf_path(target_board) + elf_path = get_elf_path(target_board, self.use_portable_ops) if not os.path.exists(elf_path): raise FileNotFoundError( diff --git a/backends/test/suite/flows/arm.py b/backends/test/suite/flows/arm.py index 85674331eda..a690e4681f8 100644 --- a/backends/test/suite/flows/arm.py +++ b/backends/test/suite/flows/arm.py @@ -20,11 +20,15 @@ def _create_arm_flow( compile_spec: ArmCompileSpec, symmetric_io_quantization: bool = False, per_channel_quantization: bool = True, + use_portable_ops: bool = True, + timeout: int = 1200, ) -> TestFlow: def _create_arm_tester(*args, **kwargs) -> ArmTester: kwargs["compile_spec"] = compile_spec - return ArmTester(*args, **kwargs) + return ArmTester( + *args, **kwargs, use_portable_ops=use_portable_ops, timeout=timeout + ) support_serialize = not isinstance(compile_spec, TosaCompileSpec) quantize = compile_spec.tosa_spec.support_integer()