diff --git a/FEXCore/Source/Common/SoftFloat.h b/FEXCore/Source/Common/SoftFloat.h index dd6b4ae367..c1e8743f3a 100644 --- a/FEXCore/Source/Common/SoftFloat.h +++ b/FEXCore/Source/Common/SoftFloat.h @@ -504,6 +504,47 @@ struct FEX_PACKED X80SoftFloat { return std::bit_cast(Result); } + bool IsSignalingNaN() const { + return (Exponent == 0x7FFF) && (Significand & 0x8000000000000000ULL) && !(Significand & 0x4000000000000000ULL) && // Bit 62 clear (signaling) + (Significand & 0x3FFFFFFFFFFFFFFFULL); + } + + bool IsQuietNaN() const { + return (Exponent == 0x7FFF) && (Significand & 0x8000000000000000ULL) && (Significand & 0x4000000000000000ULL); // Bit 62 set (quiet) + } + + // Helper to detect if this is any NaN + bool IsNaN() const { + return IsSignalingNaN() || IsQuietNaN(); + } + + // X87 value to F64 while preserving signaling nan property + double ToF64_PreserveNaN(softfloat_state* state) const { + if (IsSignalingNaN()) { + // we keep it as a signaling nan in ieee754 in 64bits + uint64_t sign_bit = Sign ? 0x8000000000000000ULL : 0; + uint64_t exp_bits = 0x7FF0000000000000ULL; + uint64_t x87_frac = Significand & 0x3FFFFFFFFFFFFFFFULL; + uint64_t ieee_frac = (x87_frac >> 11) & 0x0007FFFFFFFFFFFFULL; + + if (ieee_frac == 0) { + ieee_frac = 1; + } + ieee_frac &= ~0x0008000000000000ULL; + + uint64_t result_bits = sign_bit | exp_bits | ieee_frac; + return std::bit_cast(result_bits); + } else if (IsQuietNaN()) { + const float64_t Result = extF80_to_f64(state, *this); + uint64_t result_bits = std::bit_cast(Result); + result_bits |= 0x0008000000000000ULL; + return std::bit_cast(result_bits); + } else { + const float64_t Result = extF80_to_f64(state, *this); + return std::bit_cast(Result); + } + } + double ToF64(softfloat_state* state) const { const float64_t Result = extF80_to_f64(state, *this); return std::bit_cast(Result); @@ -584,6 +625,39 @@ struct FEX_PACKED X80SoftFloat { *this = f64_to_extF80(state, std::bit_cast(rhs)); } + // Create X80SoftFloat from double while preserving NaN signaling properties + static X80SoftFloat FromF64_PreserveNaN(softfloat_state* state, double value) { + uint64_t bits = std::bit_cast(value); + + // Check if it's a nan + if ((bits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL && (bits & 0x000FFFFFFFFFFFFFULL) != 0) { + + X80SoftFloat result; + result.Sign = (bits >> 63) & 1; + result.Exponent = 0x7FFF; + + bool is_signaling = !(bits & 0x0008000000000000ULL); + uint64_t ieee_payload = bits & 0x0007FFFFFFFFFFFFULL; + + // set bit 63 required for x87 + result.Significand = 0x8000000000000000ULL; + + if (is_signaling) { // clear bit 62 for signaling nan + result.Significand &= ~0x4000000000000000ULL; + } else { // set bit 62 for quiet nan + result.Significand |= 0x4000000000000000ULL; + } + + // ieee754 51-bit payload -> x87 62-bit payload + result.Significand |= (ieee_payload << 11) & 0x3FFFFFFFFFFFFFFFULL; + + return result; + } + + // For non-NaN values, use standard conversion + return X80SoftFloat(state, value); + } + X80SoftFloat(softfloat_state* state, BIGFLOAT rhs) { #if BIGFLOATSIZE == 16 *this = f128_to_extF80(state, std::bit_cast(rhs)); diff --git a/FEXCore/Source/Interface/Config/Config.cpp b/FEXCore/Source/Interface/Config/Config.cpp index fd1af7e3bf..aa2034d552 100644 --- a/FEXCore/Source/Interface/Config/Config.cpp +++ b/FEXCore/Source/Interface/Config/Config.cpp @@ -413,6 +413,12 @@ void ReloadMetaLayer() { // Single stepping also enforces single instruction size blocks Set(FEXCore::Config::ConfigOption::CONFIG_MAXINST, "1"); } + + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_X87STRICTREDUCEDPRECISION) && + Meta->GetConv(FEXCore::Config::CONFIG_X87STRICTREDUCEDPRECISION).value_or(false)) { + // Strict reduced precision requires reduced precision to be enabled + Set(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION, "1"); + } } void AddLayer(fextl::unique_ptr _Layer) { diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index 710d52b392..6669c0dc8c 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -465,6 +465,14 @@ "Emulates X87 floating point using 64-bit precision. This reduces emulation accuracy and may result in rendering bugs." ] }, + "X87StrictReducedPrecision": { + "Type": "bool", + "Default": "false", + "Desc": [ + "Enables stricter X87 floating point behavior when X87ReducedPrecision is enabled.", + "Adds additional checks and implementations like NaN propagation for better compatibility." + ] + }, "StallProcess": { "Type": "bool", "Default": "false", diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 169b3e1810..a5725c5bf5 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -217,6 +217,7 @@ class ContextImpl final : public FEXCore::Context::Context, public CPU::CodeBuff FEX_CONFIG_OPT(BlockJITNaming, BLOCKJITNAMING); FEX_CONFIG_OPT(GDBSymbols, GDBSYMBOLS); FEX_CONFIG_OPT(x87ReducedPrecision, X87REDUCEDPRECISION); + FEX_CONFIG_OPT(x87StrictReducedPrecision, X87STRICTREDUCEDPRECISION); FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY); FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS); FEX_CONFIG_OPT(SmallTSCScale, SMALLTSCSCALE); diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index fa929b7b7e..62da3b312e 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -57,20 +57,13 @@ desc: Glues Frontend, OpDispatcher and IR Opts & Compilation, LookupCache, Dispa #include #include #include -#include -#include #include #include #include -#include #include -#include #include -#include #include -#include #include -#include #include #include diff --git a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h index 06f03fde55..a0b56a0f04 100644 --- a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h +++ b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h @@ -2,11 +2,13 @@ #pragma once #include "Common/SoftFloat.h" +#include "Interface/Context/Context.h" #include "Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h" #include "Interface/IR/IR.h" #include #include +#include namespace FEXCore::CPU { FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW, bool Force80BitPrecision = false) { @@ -77,6 +79,12 @@ struct OpHandlers { FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle8(uint16_t FCW, double src, FEXCore::Core::CpuStateFrame* Frame) { FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1); ScopedSoftFloatState State {FCW, Frame}; + auto Context = static_cast(Frame->Thread->CTX); + auto ReducedPrecisionMode = Context->Config.x87ReducedPrecision; + auto StrictReducedPrecisionMode = Context->Config.x87StrictReducedPrecision; + if (!ReducedPrecisionMode || StrictReducedPrecisionMode) { + return X80SoftFloat::FromF64_PreserveNaN(&State.State, src); + } return X80SoftFloat(&State.State, src); } }; @@ -115,6 +123,12 @@ struct OpHandlers { FEXCORE_PRESERVE_ALL_ATTR static double handle8(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) { FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1); ScopedSoftFloatState State {FCW, Frame}; + auto Context = static_cast(Frame->Thread->CTX); + auto ReducedPrecisionMode = Context->Config.x87ReducedPrecision; + auto StrictReducedPrecisionMode = Context->Config.x87StrictReducedPrecision; + if (!ReducedPrecisionMode || StrictReducedPrecisionMode) { + return X80SoftFloat(src).ToF64_PreserveNaN(&State.State); + } return X80SoftFloat(src).ToF64(&State.State); } }; diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 5c2a5cab15..df5cd0493e 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1330,6 +1330,7 @@ class OpDispatchBuilder final : public IREmitter { private: FEX_CONFIG_OPT(ReducedPrecisionMode, X87REDUCEDPRECISION); + FEX_CONFIG_OPT(StrictReducedPrecisionMode, X87STRICTREDUCEDPRECISION); struct JumpTargetInfo { Ref BlockEntry; diff --git a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp index f903b225a0..4e5c10cb8f 100644 --- a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp @@ -157,7 +157,9 @@ class X87StackOptimization final : public Pass { : Features(Features) , GPROpSize(GPROpSize) { FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION); + FEX_CONFIG_OPT(StrictReducedPrecision, X87STRICTREDUCEDPRECISION); ReducedPrecisionMode = ReducedPrecision; + StrictReducedPrecisionMode = StrictReducedPrecision; } void Run(IREmitter* Emit) override; @@ -165,10 +167,12 @@ class X87StackOptimization final : public Pass { const FEXCore::HostFeatures& Features; const OpSize GPROpSize; bool ReducedPrecisionMode; + bool StrictReducedPrecisionMode; FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS); // Helpers Ref RotateRight8(uint32_t V, Ref Amount); + Ref SilenceNaN(Ref Value, OpSize StoreSize); void F80SplitStore_Helper(const IROp_StoreStackMem* Op, Ref StackNode, Ref AddrNode, Ref Offset, OpSize Align, MemOffsetType OffsetType, uint8_t OffsetScale) { @@ -215,6 +219,7 @@ class X87StackOptimization final : public Pass { case OpSize::i32Bit: case OpSize::i64Bit: { StackNode = IREmit->_F80CVT(Op->StoreSize, StackNode); + StackNode = SilenceNaN(StackNode, Op->StoreSize); IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale); break; } @@ -244,6 +249,9 @@ class X87StackOptimization final : public Pass { [[fallthrough]]; } case OpSize::i64Bit: { + if (StrictReducedPrecisionMode) { + StackNode = SilenceNaN(StackNode, Op->StoreSize); + } IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale); break; } @@ -333,6 +341,8 @@ class X87StackOptimization final : public Pass { // Cache for Constants // ConstantPoll[i] has IREmit->_Constant(i); std::array ConstantPool {}; + Ref CachedQuietBit32 {}; + Ref CachedQuietBit64 {}; Ref GetConstant(ssize_t Offset); // Cached value for Top @@ -367,6 +377,8 @@ inline const X87StackOptimization::StackMemberInfo X87StackOptimization::StackMe inline void X87StackOptimization::InvalidateCaches() { InvalidateCachedRegs(); ConstantPool.fill(nullptr); + CachedQuietBit32 = nullptr; + CachedQuietBit64 = nullptr; } inline void X87StackOptimization::InvalidateCachedRegs() { @@ -385,12 +397,23 @@ inline void X87StackOptimization::Reset() { } inline Ref X87StackOptimization::GetConstant(ssize_t Offset) { + if (Offset == 0x00400000) { + if (!CachedQuietBit32) { + CachedQuietBit32 = IREmit->_Constant(Offset); + } + return CachedQuietBit32; + } + if (Offset == 0x0008000000000000LL) { + if (!CachedQuietBit64) { + CachedQuietBit64 = IREmit->_Constant(Offset); + } + return CachedQuietBit64; + } + if (Offset < 0 || Offset >= X87StackOptimization::ConstantPool.size()) { - // not dealt by pool return IREmit->_Constant(Offset); } if (ConstantPool[Offset] == nullptr) { - ConstantPool[Offset] = IREmit->_Constant(Offset); } return ConstantPool[Offset]; @@ -493,6 +516,30 @@ inline Ref X87StackOptimization::RotateRight8(uint32_t V, Ref Amount) { return IREmit->_Lshr(OpSize::i32Bit, GetConstant(V | (V << 8)), Amount); } +inline Ref X87StackOptimization::SilenceNaN(Ref Value, OpSize StoreSize) { + // We expect Value here to reach after conversion - so it's already in the target size (32 or 64 bit float) + // Never 80bit since we do not silence 80bit values, since it's likely a copy in that case. + LOGMAN_THROW_A_FMT(StoreSize == OpSize::i32Bit || StoreSize == OpSize::i64Bit, "Unexpected store size"); + + const auto RegisterSize = OpSize::i64Bit; + const auto ElementSize = StoreSize; + + // Create quiet bit constant in FPR + Ref QuietBitConst; + if (StoreSize == OpSize::i32Bit) { + // 0x00400000 - Bit 22 for 32-bit float + QuietBitConst = IREmit->_VCastFromGPR(RegisterSize, ElementSize, GetConstant(0x00400000U)); + } else { + // 0x0008000000000000 - Bit 51 for 64-bit double + QuietBitConst = IREmit->_VCastFromGPR(RegisterSize, ElementSize, GetConstant(0x0008000000000000ULL)); + } + + // NaN detection: fcmeq(v, v) == 0xFFFFFFFF if NOT NaN, 0x00000000 if NaN + Ref IsNotNaNMask = IREmit->_VFCMPEQ(RegisterSize, ElementSize, Value, Value); + Ref Silenced = IREmit->_VOr(RegisterSize, ElementSize, Value, QuietBitConst); + return IREmit->_VBSL(RegisterSize, IsNotNaNMask, Value, Silenced); +} + inline std::optional X87StackOptimization::MigrateToSlowPath_IfInvalid(uint8_t Offset) { const auto& [Valid, StackMember] = StackData.top(Offset); MigrateToSlowPathIf(Valid != StackSlot::VALID); @@ -1011,6 +1058,9 @@ void X87StackOptimization::Run(IREmitter* Emit) { if (Op->StoreSize == OpSize::f80Bit) { Store80BitToMem(Op, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale); } else { + if (!ReducedPrecisionMode || StrictReducedPrecisionMode) { + SourceValue = SilenceNaN(SourceValue, Op->StoreSize); + } IREmit->_StoreMemFPR(StoreSize, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale); } break; diff --git a/Source/Tools/FEXConfig/main.qml b/Source/Tools/FEXConfig/main.qml index c85417b1eb..257a9f71c6 100644 --- a/Source/Tools/FEXConfig/main.qml +++ b/Source/Tools/FEXConfig/main.qml @@ -706,10 +706,17 @@ ApplicationWindow { } ConfigCheckBox { + id: x87ReducedPrecisionCheckbox text: qsTr("Reduced x87 precision") config: "X87ReducedPrecision" } + ConfigCheckBox { + text: qsTr("Strict reduced x87 precision") + config: "X87StrictReducedPrecision" + enabled: x87ReducedPrecisionCheckbox.checked + } + ConfigCheckBox { text: qsTr("Disable JIT optimization passes") config: "O0" diff --git a/unittests/32Bit_ASM/CMakeLists.txt b/unittests/32Bit_ASM/CMakeLists.txt index 187a5a6752..ea434848d3 100644 --- a/unittests/32Bit_ASM/CMakeLists.txt +++ b/unittests/32Bit_ASM/CMakeLists.txt @@ -38,7 +38,7 @@ foreach(ASM_SRC ${ASM_SOURCES}) add_custom_command(OUTPUT ${OUTPUT_NAME} DEPENDS "${TMP_FILE}" - COMMAND "nasm" ARGS "${TMP_FILE}" "-o" "${OUTPUT_NAME}") + COMMAND "nasm" ARGS "-i" "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Includes/" "${TMP_FILE}" "-o" "${OUTPUT_NAME}") add_custom_command(OUTPUT ${OUTPUT_CONFIG_NAME} DEPENDS "${ASM_SRC}" diff --git a/unittests/32Bit_ASM/Includes/nan_test_macros.inc b/unittests/32Bit_ASM/Includes/nan_test_macros.inc new file mode 100644 index 0000000000..c730fc915e --- /dev/null +++ b/unittests/32Bit_ASM/Includes/nan_test_macros.inc @@ -0,0 +1,130 @@ +; NaN Testing Macros for 32-bit Assembly Tests +; Implements NaN triple testing system: +; - Bit 2: 1 if value is NaN +; - Bit 1: 1 if quiet NaN +; - Bit 0: 1 if signaling NaN +; +; Triple values: +; 0b000 (0): Not a NaN +; 0b101 (5): Signaling NaN +; 0b110 (6): Quiet NaN +; +; ASSUMPTION: All input pointers (edx) are valid and non-null + +; Macro: CHECK_NAN_TRIPLE_32 +; Checks 32-bit float NaN classification and returns triple in EAX +; Input: 32-bit float value in xmm0 +; Output: NaN triple in EAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_32 0 + push ecx + push esi + push edx + + xor eax, eax + ucomiss xmm0, xmm0 + setp al + mov ecx, eax + shl ecx, 2 + + ; Extract and check quiet bit (bit 22) + movd edx, xmm0 + and edx, 0x00400000 + mov esi, edx + shr esi, 22 + and esi, eax + and esi, 1 + shl esi, 1 + add ecx, esi + + ; Check for signaling NaN (NaN but not quiet) + test edx, edx + sete dl + and dl, al + movzx eax, dl + or eax, ecx + + pop edx + pop esi + pop ecx +%endmacro + +; Macro: CHECK_NAN_TRIPLE_64 +; Checks 64-bit double NaN classification and returns triple in EAX +; Input: 64-bit double value should be pre-stored at [edx] by caller +; Output: NaN triple in EAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_64 0 + push ebx + push esi + sub esp, 12 + + ; Load 64-bit double and use SSE for NaN comparison + movsd xmm0, qword [edx] + xor eax, eax + ucomisd xmm0, xmm0 + setp al + + mov ecx, eax + movsd qword [esp], xmm0 + mov edx, 524288 + and edx, [esp + 4] + shl ecx, 2 + + ; Extract quiet bit (bit 51) + mov ebx, edx + shr ebx, 19 + and bl, al + movzx esi, bl + lea ecx, [ecx + 2*esi] + + ; Check for signaling NaN (NaN but not quiet) + test edx, edx + sete dl + and dl, al + movzx eax, dl + or eax, ecx + + add esp, 12 + pop esi + pop ebx +%endmacro + +; Macro: CHECK_NAN_TRIPLE_80 +; Checks 80-bit extended precision NaN classification and returns triple in EAX +; Input: 80-bit extended precision value in memory at [eax] (10 bytes) +; Output: NaN triple in EAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_80 0 + push ebx + push esi + sub esp, 20 + + ; Load the 80-bit value and store copy for bit manipulation + fld tword [eax] + fld st0 + fstp tword [esp] + + ; Get bits 63:32 from stored significand + mov ecx, [esp + 4] + xor eax, eax + fucomip st0 + setp al + mov edx, eax + shl edx, 2 + + ; Extract quiet bit (bit 30 in high dword) + mov ebx, ecx + shr ebx, 30 + and bl, al + movzx esi, bl + lea edx, [edx + 2*esi] + + ; Check for signaling NaN using bt instruction + bt ecx, 30 + setae cl + and cl, al + movzx eax, cl + or eax, edx + + add esp, 20 + pop esi + pop ebx +%endmacro diff --git a/unittests/32Bit_ASM/X87/x87_32bit_qnan_preservation.asm b/unittests/32Bit_ASM/X87/x87_32bit_qnan_preservation.asm new file mode 100644 index 0000000000..2006ba0cfe --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_32bit_qnan_preservation.asm @@ -0,0 +1,36 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 quiet NaN preservation in non-reduced precision mode (32-bit) +; This test verifies that FLDT loads a quiet NaN and preserves its nature +; We test that loading a quiet nan preserves it +; that then storing it as 32bit, keeps it as a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea edx, [.data] +fld tword [edx] ; load qnan 80bit +fstp dword [edx + 16] ; store qnan as 32bit + +; Check the stored 32-bit value using NaN triple macro +lea edx, [.data + 16] +movss xmm0, [edx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +.data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + dd 0 ; space for 32-bit result diff --git a/unittests/32Bit_ASM/X87/x87_32bit_snan_preservation.asm b/unittests/32Bit_ASM/X87/x87_32bit_snan_preservation.asm new file mode 100644 index 0000000000..9fb4f363fb --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_32bit_snan_preservation.asm @@ -0,0 +1,36 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in non-reduced precision mode (32-bit) +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 32bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN (converted from signaling) + +finit +lea edx, [.data] +fld tword [edx] ; load snan +fstp dword [edx + 16] ; store snan as 32bit qnan + +; Check the stored 32-bit value using NaN triple macro +lea edx, [.data + 16] +movss xmm0, [edx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +.data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dd 0 ; space for 32-bit result diff --git a/unittests/32Bit_ASM/X87/x87_neg_snan_roundtrip.asm b/unittests/32Bit_ASM/X87/x87_neg_snan_roundtrip.asm new file mode 100644 index 0000000000..d7e17556da --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_neg_snan_roundtrip.asm @@ -0,0 +1,34 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling negative NaN round-trip preservation in non-reduced precision mode (32-bit) +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +lea edx, [.data] +fld tword [edx] ; load snan 80bit +fstp tword [edx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +lea eax, [.data + 16] +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +.data: + dq 0xa000000000000000 ; signaling nan significand + dw 0xffff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) diff --git a/unittests/32Bit_ASM/X87/x87_qnan_preservation.asm b/unittests/32Bit_ASM/X87/x87_qnan_preservation.asm new file mode 100644 index 0000000000..6e765993b8 --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_qnan_preservation.asm @@ -0,0 +1,34 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 quiet NaN preservation in non-reduced precision mode (32-bit) +; This test verifies that quiet NaNs remain quiet during conversion +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea edx, [.data] +fld tword [edx] ; load qnan 80bit +fstp qword [edx + 16] ; store qnan as 64bit + +; Check the stored 64-bit value using NaN triple macro +lea edx, [.data + 16] +movsd xmm0, [edx] ; Load 64-bit double into xmm0 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +.data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + dq 0 ; space for 64-bit result diff --git a/unittests/32Bit_ASM/X87/x87_snan_preservation.asm b/unittests/32Bit_ASM/X87/x87_snan_preservation.asm new file mode 100644 index 0000000000..c431aabe59 --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_snan_preservation.asm @@ -0,0 +1,36 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in non-reduced precision mode (32-bit) +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 64bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +mov edx, .data +fld tword [edx] ; load snan +fstp qword [edx + 16] ; store snan as 64bit qnan + +; Check the stored 64-bit value using NaN triple macro +mov edx, .data +add edx, 16 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +.data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dq 0 ; space for 64-bit result diff --git a/unittests/32Bit_ASM/X87/x87_snan_roundtrip.asm b/unittests/32Bit_ASM/X87/x87_snan_roundtrip.asm new file mode 100644 index 0000000000..3402f589fe --- /dev/null +++ b/unittests/32Bit_ASM/X87/x87_snan_roundtrip.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + }, + "Mode": "32BIT" +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN round-trip preservation in non-reduced precision mode (32-bit) +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +mov edx, .data +fld tword [edx] ; load nan 80bit +fstp tword [edx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +mov eax, edx +add eax, 16 +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +.data: + dq 0xa000000000000000 ; signaling nan significand + dw 0x7fff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) diff --git a/unittests/ASM/Includes/nan_test_macros.inc b/unittests/ASM/Includes/nan_test_macros.inc new file mode 100644 index 0000000000..fc623dbe91 --- /dev/null +++ b/unittests/ASM/Includes/nan_test_macros.inc @@ -0,0 +1,128 @@ +; NaN Testing Macros for Assembly Tests +; Implements NaN triple testing system: +; - Bit 2: 1 if value is NaN +; - Bit 1: 1 if quiet NaN +; - Bit 0: 1 if signaling NaN +; +; Triple values: +; 0b000 (0): Not a NaN +; 0b101 (5): Signaling NaN +; 0b110 (6): Quiet NaN +; +; ASSUMPTION: All input pointers (rdx) are valid and non-null + +; Macro: CHECK_NAN_TRIPLE_32 +; Checks 32-bit float NaN classification and returns triple in EAX +; Input: 32-bit float value in xmm0 +; Output: NaN triple in EAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_32 0 + push rcx + push rsi + push rdx + + xor eax, eax + ucomiss xmm0, xmm0 + setp al + lea rcx, [4*rax] + + ; Extract and check quiet bit (bit 22) + movd edx, xmm0 + and edx, 0x00400000 + mov esi, edx + shr esi, 22 + and sil, al + movzx esi, sil + lea rcx, [rcx + 2*rsi] + + ; Check for signaling NaN (NaN but not quiet) + test edx, edx + sete dl + and dl, al + movzx eax, dl + or eax, ecx + + pop rdx + pop rsi + pop rcx +%endmacro + +; Macro: CHECK_NAN_TRIPLE_64 +; Checks 64-bit double NaN classification and returns triple in RAX +; Input: 64-bit double value in xmm0 +; Output: NaN triple in RAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_64 0 + push rcx + push rsi + push rdx + + xor eax, eax + ucomisd xmm0, xmm0 + setp al + lea rcx, [4*rax] + + ; Extract and check quiet bit (bit 51) + movq rdx, xmm0 + mov rsi, 0x0008000000000000 + and rsi, rdx + mov rdx, rsi + shr rdx, 51 + and dl, al + movzx rdx, dl + lea rcx, [rcx + 2*rdx] + + ; Check for signaling NaN (NaN but not quiet) + test rsi, rsi + sete dl + and dl, al + movzx eax, dl + or eax, ecx + + pop rdx + pop rsi + pop rcx +%endmacro + +; Macro: CHECK_NAN_TRIPLE_80 +; Checks 80-bit extended precision NaN classification and returns triple in RAX +; Input: 80-bit extended precision value in memory at [rax] (10 bytes) +; Output: NaN triple in RAX (bits 2:0) +%macro CHECK_NAN_TRIPLE_80 0 + push rcx + push rdx + push rsi + + ; Load the 80-bit value twice for comparison + fld tword [rax] + fld tword [rax] + + ; Store one copy to memory for bit manipulation + sub rsp, 16 + fstp tword [rsp] + + ; Use fucomip for NaN detection + xor eax, eax + fucomip st0, st1 + setp al + lea rdx, [4*rax] + + ; Extract and check quiet bit (bit 62) + mov rcx, 0x4000000000000000 + and rcx, [rsp] + mov rsi, rcx + shr rsi, 62 + and sil, al + movzx rsi, sil + lea rdx, [rdx + 2*rsi] + + ; Check for signaling NaN (NaN but not quiet) + test rcx, rcx + sete cl + and cl, al + movzx eax, cl + or eax, edx + + add rsp, 16 + pop rsi + pop rdx + pop rcx +%endmacro diff --git a/unittests/ASM/X87/x87_32bit_high_bit_value_test.asm b/unittests/ASM/X87/x87_32bit_high_bit_value_test.asm new file mode 100644 index 0000000000..772587535b --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_high_bit_value_test.asm @@ -0,0 +1,30 @@ +%ifdef CONFIG +{ + "RegData": { + "XMM0": ["0x7F7FFFFF", "0", "0", "0"] + } +} +%endif + +mov rsp, 0xe0000040 + +; Test a 32-bit value with many high bits set (max normal float) +; This could be misinterpreted as having NaN-like properties +; when treated as 64-bit, potentially triggering incorrect ORing. + +finit +lea rdx, [rel data] +fld dword [rdx] ; Load max normal 32-bit float +fstp dword [rdx + 16] ; Store back as 32-bit + +; Verify the value is unchanged +movss xmm0, [rdx + 16] + +hlt + +align 4096 +data: + dd 0x7F7FFFFF ; Max normal 32-bit float + dd 0 + dd 0 + dd 0 ; space for result diff --git a/unittests/ASM/X87/x87_32bit_qnan_preservation.asm b/unittests/ASM/X87/x87_32bit_qnan_preservation.asm new file mode 100644 index 0000000000..f2c916544c --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_qnan_preservation.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 quiet NaN preservation in non-reduced precision mode +; This test verifies that FLDT loads a quiet NaN and preserves its nature +; We test that loading a quiet nan preserves it +; that then storing it as 32bit, keeps it as a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load qnan 80bit +fstp dword [rdx + 16] ; store qnan as 32bit + +; Check the stored 32-bit value using NaN triple macro +lea rdx, [rel data + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + dd 0 ; space for 32-bit result diff --git a/unittests/ASM/X87/x87_32bit_snan_bit_position_test.asm b/unittests/ASM/X87/x87_32bit_snan_bit_position_test.asm new file mode 100644 index 0000000000..668108dcde --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_snan_bit_position_test.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test that a 32-bit signaling NaN gets bit 22 set (not bit 51) +; A 32-bit sNaN has all exponent bits set and bit 22 clear +; After silencing, bit 22 should be set, making it a qNaN +; Value: 0x7F800001 (sNaN) should become 0x7FC00001 (qNaN with bit 22 set) + +finit +lea rdx, [rel data] +fld dword [rdx] ; Load 32-bit sNaN +fstp dword [rdx + 16] ; Store back - should silence to qNaN + +; Load the result and check bit 22 is set +lea rdx, [rdx + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +data: + dd 0x7F800001 ; 32-bit signaling NaN (exp=0xFF, frac=1, bit 22 clear) + dd 0 + dd 0 + dd 0 ; space for result diff --git a/unittests/ASM/X87/x87_32bit_snan_full_precision_store.asm b/unittests/ASM/X87/x87_32bit_snan_full_precision_store.asm new file mode 100644 index 0000000000..f8852f4142 --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_snan_full_precision_store.asm @@ -0,0 +1,40 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN silencing in FULL precision mode with 32-bit store +; This test verifies that storing an 80-bit signaling NaN as 32-bit properly +; silences it by setting bit 22 (the quiet bit for 32-bit floats). +; +; The bug: SilenceNaN was hardcoded for 64-bit, using: +; - 64-bit FCmp (won't detect 32-bit NaN as NaN) +; - bit 51 instead of bit 22 +; This caused signaling NaNs to remain signaling after 32-bit stores. +; +; Returns NaN triple: 6 (0b110) for quiet NaN (should be converted from signaling) + +finit +lea rdx, [rel data] +fld tword [rdx] ; Load 80-bit signaling NaN +fstp dword [rdx + 16] ; Store as 32-bit - should silence it + +; Check the stored 32-bit value using NaN triple macro +lea rdx, [rel data + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 ; Should return 6 (quiet NaN) + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling NaN significand (bit 62 clear) + dw 0x7fff ; signaling NaN exponent (all 1s) + dd 0 ; space for 32-bit result diff --git a/unittests/ASM/X87/x87_32bit_snan_preservation.asm b/unittests/ASM/X87/x87_32bit_snan_preservation.asm new file mode 100644 index 0000000000..8b874235d8 --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_snan_preservation.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in non-reduced precision mode +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 32bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN (converted from signaling) + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan +fstp dword [rdx + 16] ; store snan as 32bit qnan + +; Check the stored 32-bit value using NaN triple macro +lea rdx, [rel data + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dd 0 ; space for 32-bit result diff --git a/unittests/ASM/X87/x87_32bit_snan_stack_move.asm b/unittests/ASM/X87/x87_32bit_snan_stack_move.asm new file mode 100644 index 0000000000..33a18e332a --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_snan_stack_move.asm @@ -0,0 +1,47 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN silencing by moving through the stack +; Load sNaN, push another value to force stack use, then store the sNaN +; This ensures x87 stack operations are used without arithmetic that would silence + +finit +lea rdx, [rel data] + +; Load the sNaN +fld dword [rdx] ; ST(0) = sNaN + +; Push another value to force stack management +fld1 ; ST(0) = 1.0, ST(1) = sNaN + +; Exchange to get sNaN back on top +fxch st1 ; ST(0) = sNaN, ST(1) = 1.0 + +; Pop the extra value +fstp st1 ; ST(0) = sNaN (copied to ST(1), then ST(1) popped) + +; Store the sNaN - should be silenced to qNaN +fstp dword [rdx + 16] + +; Load the result and check bit 22 is set +lea rdx, [rdx + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +data: + dd 0x7F800001 ; 32-bit signaling NaN + dd 0 + dd 0 + dd 0 ; space for result diff --git a/unittests/ASM/X87/x87_32bit_snan_with_operation.asm b/unittests/ASM/X87/x87_32bit_snan_with_operation.asm new file mode 100644 index 0000000000..a6fb8f8283 --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_snan_with_operation.asm @@ -0,0 +1,34 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0x7FC00001" + } +} +%endif + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN silencing with an operation in between +; This forces the use of x87 stack operations instead of direct load/store +; A 32-bit sNaN has all exponent bits set and bit 22 clear +; After an operation and store, bit 22 should be set, making it a qNaN +; Value: 0x7F800001 (sNaN) should become 0x7FC00001 (qNaN with bit 22 set) + +finit +lea rdx, [rel data] +fld dword [rdx] ; Load 32-bit sNaN onto x87 stack +fld dword [rdx] ; Load it again - now we have st0=sNaN, st1=sNaN +fadd ; Add them - st0 = sNaN + sNaN = sNaN (NaN propagates) +fstp dword [rdx + 16] ; Store result - should silence to qNaN + +; Load the result and check bit 22 is set +mov eax, [rdx + 16] + +hlt + +align 4096 +data: + dd 0x7F800001 ; 32-bit signaling NaN (exp=0xFF, frac=1, bit 22 clear) + dd 0 + dd 0 + dd 0 ; space for result diff --git a/unittests/ASM/X87/x87_32bit_value_corruption_test.asm b/unittests/ASM/X87/x87_32bit_value_corruption_test.asm new file mode 100644 index 0000000000..9b50081011 --- /dev/null +++ b/unittests/ASM/X87/x87_32bit_value_corruption_test.asm @@ -0,0 +1,31 @@ +%ifdef CONFIG +{ + "RegData": { + "XMM0": ["0x3F800000", "0", "0", "0"] + } +} +%endif + +mov rsp, 0xe0000040 + +; Test that a simple 32-bit value (1.0) doesn't get corrupted +; when stored through the x87 stack in full precision mode. +; The buggy SilenceNaN extracts 64 bits and ORs with bit 51, +; which could corrupt the value. + +finit +lea rdx, [rel data] +fld dword [rdx] ; Load 1.0 as 32-bit +fstp dword [rdx + 16] ; Store back as 32-bit + +; Load result and verify it's still 1.0 +movss xmm0, [rdx + 16] + +hlt + +align 4096 +data: + dd 0x3F800000 ; 1.0 in 32-bit float + dd 0 + dd 0 + dd 0 ; space for result diff --git a/unittests/ASM/X87/x87_neg_snan_roundtrip.asm b/unittests/ASM/X87/x87_neg_snan_roundtrip.asm new file mode 100644 index 0000000000..2f8af54aba --- /dev/null +++ b/unittests/ASM/X87/x87_neg_snan_roundtrip.asm @@ -0,0 +1,33 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling negative NaN round-trip preservation in non-reduced precision mode +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan 80bit +fstp tword [rdx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +lea rax, [rel data + 16] +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling nan significand + dw 0xffff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) diff --git a/unittests/ASM/X87/x87_qnan_preservation.asm b/unittests/ASM/X87/x87_qnan_preservation.asm new file mode 100644 index 0000000000..c33c7ece29 --- /dev/null +++ b/unittests/ASM/X87/x87_qnan_preservation.asm @@ -0,0 +1,33 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 quiet NaN preservation in non-reduced precision mode +; This test verifies that quiet NaNs remain quiet during conversion +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load qnan 80bit +fstp qword [rdx + 16] ; store qnan as 64bit + +; Check the stored 64-bit value using NaN triple macro +lea rdx, [rel data + 16] +movsd xmm0, [rdx] ; Load 64-bit double into xmm0 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + dq 0 ; space for 64-bit result diff --git a/unittests/ASM/X87/x87_snan_preservation.asm b/unittests/ASM/X87/x87_snan_preservation.asm new file mode 100644 index 0000000000..c6710917c8 --- /dev/null +++ b/unittests/ASM/X87/x87_snan_preservation.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in non-reduced precision mode +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 64bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan +fstp qword [rdx + 16] ; store snan as 64bit qnan + +; Check the stored 64-bit value using NaN triple macro +lea rdx, [rel data + 16] +movsd xmm0, [rdx] ; Load 64-bit double into xmm0 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dq 0 ; space for 64-bit result diff --git a/unittests/ASM/X87/x87_snan_roundtrip.asm b/unittests/ASM/X87/x87_snan_roundtrip.asm new file mode 100644 index 0000000000..a16b035b12 --- /dev/null +++ b/unittests/ASM/X87/x87_snan_roundtrip.asm @@ -0,0 +1,33 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + } +} +%endif + +%include "nan_test_macros.inc" + +mov rsp, 0xe0000040 + +; Test x87 signaling NaN round-trip preservation in non-reduced precision mode +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load nan 80bit +fstp tword [rdx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +lea rax, [rel data + 16] +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling nan significand + dw 0x7fff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) diff --git a/unittests/ASM/X87_F64/x87_32bit_qnan_preservation_F64.asm b/unittests/ASM/X87_F64/x87_32bit_qnan_preservation_F64.asm new file mode 100644 index 0000000000..6ac4e0721f --- /dev/null +++ b/unittests/ASM/X87_F64/x87_32bit_qnan_preservation_F64.asm @@ -0,0 +1,42 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Env": { "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +section .text +global _start +_start: + +mov esp, 0xe0000040 + +; Test x87 quiet NaN preservation in reduced precision mode +; This test verifies that quiet NaNs remain quiet during conversion +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel qnan_data] +fld tword [rdx] ; load qnan 80bit +lea rdx, [rel result] +fstp dword [rdx] ; store qnan as 32bit + +; Check the stored 32-bit value using NaN triple macro +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 + +section .data + align 16 + qnan_data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + result: + dd 0 ; space for 32-bit result diff --git a/unittests/ASM/X87_F64/x87_32bit_snan_preservation_F64.asm b/unittests/ASM/X87_F64/x87_32bit_snan_preservation_F64.asm new file mode 100644 index 0000000000..740bf99ff8 --- /dev/null +++ b/unittests/ASM/X87_F64/x87_32bit_snan_preservation_F64.asm @@ -0,0 +1,37 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Env": { "FEX_X87REDUCEDPRECISION" : "1", "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in reduced precision mode +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 32bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN (converted from signaling) + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan +fstp dword [rdx + 16] ; store snan as 32bit qnan + +; Check the stored 32-bit value using NaN triple macro +lea rdx, [rel data + 16] +movss xmm0, [rdx] ; Load 32-bit float into xmm0 +CHECK_NAN_TRIPLE_32 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dd 0 ; space for 32-bit result + diff --git a/unittests/ASM/X87_F64/x87_neg_snan_roundtrip_F64.asm b/unittests/ASM/X87_F64/x87_neg_snan_roundtrip_F64.asm new file mode 100644 index 0000000000..da10d2449e --- /dev/null +++ b/unittests/ASM/X87_F64/x87_neg_snan_roundtrip_F64.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + }, + "Env": { "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling negative NaN round-trip preservation in reduced precision mode +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan 80bit +fstp tword [rdx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +lea rax, [rel data + 16] +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling nan significand + dw 0xffff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) + diff --git a/unittests/ASM/X87_F64/x87_qnan_preservation_F64.asm b/unittests/ASM/X87_F64/x87_qnan_preservation_F64.asm new file mode 100644 index 0000000000..c24c337fb9 --- /dev/null +++ b/unittests/ASM/X87_F64/x87_qnan_preservation_F64.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Env": { "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 quiet NaN preservation in reduced precision mode +; This test verifies that quiet NaNs remain quiet during conversion +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load qnan 80bit +fstp qword [rdx + 16] ; store qnan as 64bit + +; Check the stored 64-bit value using NaN triple macro +lea rdx, [rel data + 16] +movsd xmm0, [rdx] ; Load 64-bit double into xmm0 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +data: + dq 0xc000000000000000 ; quiet NaN significand + dw 0x7fff ; NaN exponent + dq 0 ; space for 64-bit result + diff --git a/unittests/ASM/X87_F64/x87_snan_preservation_F64.asm b/unittests/ASM/X87_F64/x87_snan_preservation_F64.asm new file mode 100644 index 0000000000..c54dbdcac2 --- /dev/null +++ b/unittests/ASM/X87_F64/x87_snan_preservation_F64.asm @@ -0,0 +1,37 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "6" + }, + "Env": { "FEX_X87REDUCEDPRECISION" : "1", "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN non-preservation in reduced precision mode +; This test verifies that FLDT loads a signaling NaN and DOES NOT preserve its signaling nature +; We test that loading a signaling nan preserves it but +; that then storing it as 64bit, transforms it to a quiet nan. +; Returns NaN triple: 6 (0b110) for quiet NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load snan +fstp qword [rdx + 16] ; store snan as 64bit qnan + +; Check the stored 64-bit value using NaN triple macro +lea rdx, [rel data + 16] +movsd xmm0, [rdx] ; Load 64-bit double into xmm0 +CHECK_NAN_TRIPLE_64 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling NaN significand + dw 0x7fff ; signaling NaN exponent + dq 0 ; space for 64-bit result + diff --git a/unittests/ASM/X87_F64/x87_snan_roundtrip_F64.asm b/unittests/ASM/X87_F64/x87_snan_roundtrip_F64.asm new file mode 100644 index 0000000000..4ec82da808 --- /dev/null +++ b/unittests/ASM/X87_F64/x87_snan_roundtrip_F64.asm @@ -0,0 +1,35 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "5" + }, + "Env": { "FEX_X87STRICTREDUCEDPRECISION" : "1" } +} +%endif + +%include "nan_test_macros.inc" + +mov esp, 0xe0000040 + +; Test x87 signaling NaN round-trip preservation in reduced precision mode +; This test verifies that FLDT -> FSTPT preserves signaling nan across round-trip +; Returns NaN triple: 5 (0b101) for signaling NaN + +finit +lea rdx, [rel data] +fld tword [rdx] ; load nan 80bit +fstp tword [rdx + 16] ; store nan as 80bit + +; Check the stored 80-bit value using NaN triple macro +lea rax, [rel data + 16] +CHECK_NAN_TRIPLE_80 + +hlt + +align 4096 +data: + dq 0xa000000000000000 ; signaling nan significand + dw 0x7fff ; signaling nan exponent + dw 0, 0, 0 ; padding to 16 bytes + dq 0, 0 ; space for 80-bit result (16 bytes) + diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json index b056cc6304..ff00093b7a 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json @@ -134,7 +134,7 @@ }, "Psychonauts matrix swizzle": { "x86InstructionCount": 103, - "ExpectedInstructionCount": 113, + "ExpectedInstructionCount": 242, "Comment": [ "Hottest block in Windows Psychonauts", "Doing a 4x4 32-bit float matrix swizzle", @@ -254,100 +254,229 @@ "stur w7, [x9, #-68]", "ldur w4, [x9, #-68]", "ldr s2, [x4]", - "stur s2, [x9, #-64]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-64]", "ldur w7, [x9, #-68]", "ldr s2, [x7, #16]", - "stur s2, [x9, #-60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-60]", "ldur w5, [x9, #-68]", "ldr s2, [x5, #32]", - "stur s2, [x9, #-56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-56]", "ldur w4, [x9, #-68]", "ldr s2, [x4, #48]", - "stur s2, [x9, #-52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-52]", "ldur w7, [x9, #-68]", "ldr s2, [x7, #4]", - "stur s2, [x9, #-48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-48]", "ldur w5, [x9, #-68]", "ldr s2, [x5, #20]", - "stur s2, [x9, #-44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-44]", "ldur w4, [x9, #-68]", "ldr s2, [x4, #36]", - "stur s2, [x9, #-40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-40]", "ldur w7, [x9, #-68]", "ldr s2, [x7, #52]", - "stur s2, [x9, #-36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-36]", "ldur w5, [x9, #-68]", "ldr s2, [x5, #8]", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldur w4, [x9, #-68]", "ldr s2, [x4, #24]", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldur w7, [x9, #-68]", "ldr s2, [x7, #40]", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldur w5, [x9, #-68]", "ldr s2, [x5, #56]", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldur w4, [x9, #-68]", "ldr s2, [x4, #12]", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldur w7, [x9, #-68]", "ldr s2, [x7, #28]", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldur w5, [x9, #-68]", "ldr s2, [x5, #44]", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldur w4, [x9, #-68]", "ldr s2, [x4, #60]", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-64]", - "str s2, [x7]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7]", "ldr w5, [x9, #8]", "ldur s2, [x9, #-60]", - "str s2, [x5, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #4]", "ldr w4, [x9, #8]", "ldur s2, [x9, #-56]", - "str s2, [x4, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #8]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-52]", - "str s2, [x7, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #12]", "ldr w5, [x9, #8]", "ldur s2, [x9, #-48]", - "str s2, [x5, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #16]", "ldr w4, [x9, #8]", "ldur s2, [x9, #-44]", - "str s2, [x4, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #20]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-40]", - "str s2, [x7, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #24]", "ldr w5, [x9, #8]", "ldur s2, [x9, #-36]", - "str s2, [x5, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #28]", "ldr w4, [x9, #8]", "ldur s2, [x9, #-32]", - "str s2, [x4, #32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #32]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-28]", - "str s2, [x7, #36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #36]", "ldr w5, [x9, #8]", "ldur s2, [x9, #-24]", - "str s2, [x5, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #40]", "ldr w4, [x9, #8]", "ldur s2, [x9, #-20]", - "str s2, [x4, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #44]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-16]", - "str s2, [x7, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #48]", "ldr w5, [x9, #8]", "ldur s2, [x9, #-12]", - "str s2, [x5, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #52]", "ldr w4, [x9, #8]", "ldur s2, [x9, #-8]", - "str s2, [x4, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #56]", "ldr w7, [x9, #8]", "ldur s2, [x9, #-4]", - "str s2, [x7, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #60]", "ldr w4, [x9, #8]", "mov x8, x9", "ldr w9, [x8], #4", diff --git a/unittests/InstructionCountCI/FlagM/x87-Crysis2Max-fmodel.json b/unittests/InstructionCountCI/FlagM/x87-Crysis2Max-fmodel.json index 35e8981e28..147b1006e6 100644 --- a/unittests/InstructionCountCI/FlagM/x87-Crysis2Max-fmodel.json +++ b/unittests/InstructionCountCI/FlagM/x87-Crysis2Max-fmodel.json @@ -865,7 +865,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-128]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-128]", "ldr s2, [x4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -913,7 +918,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr s2, [x4, #120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -945,7 +954,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-124]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-124]", "ldr s2, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -993,7 +1006,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr s2, [x4, #116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1025,7 +1042,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-120]", "ldr s2, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1073,7 +1094,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldr s2, [x4, #112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1105,7 +1130,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-116]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-116]", "ldr s2, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1153,7 +1182,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldr s2, [x4, #108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1185,7 +1218,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-112]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-112]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1233,7 +1270,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldr s2, [x4, #104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1265,7 +1306,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-108]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-108]", "ldr s2, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1313,7 +1358,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldr s2, [x4, #100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1345,7 +1394,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-104]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-104]", "ldr s2, [x4, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1393,7 +1446,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldr s2, [x4, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1425,7 +1482,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-100]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-100]", "ldr s2, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1473,7 +1534,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldr s2, [x4, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1505,7 +1570,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-96]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-96]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1553,7 +1622,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-36]", "ldr s2, [x4, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1585,7 +1658,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-92]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-92]", "ldr s2, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1633,7 +1710,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-40]", "ldr s2, [x4, #84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1665,7 +1746,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-88]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-88]", "ldr s2, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1713,7 +1798,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-44]", "ldr s2, [x4, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1745,7 +1834,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-84]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-84]", "ldr s2, [x4, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1793,7 +1886,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-48]", "ldr s2, [x4, #76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1825,7 +1922,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-80]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-80]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1873,7 +1974,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-52]", "ldr s2, [x4, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1905,7 +2010,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-76]", "ldr s2, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1953,7 +2062,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-56]", "ldr s2, [x4, #68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1985,7 +2098,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-72]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-72]", "ldr s2, [x4, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2033,7 +2150,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-60]", "ldr s2, [x4, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2065,7 +2186,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-68]", "ldr s2, [x4, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2090,9 +2215,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xe358", - "movk w20, #0x100d, lsl #16", - "ldr w4, [x20]", + "mov w21, #0xe358", + "movk w21, #0x100d, lsl #16", + "ldr w4, [x21]", "ldr s3, [x7, #60]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -2116,7 +2241,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-64]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-64]", "ldur s2, [x9, #-68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2148,7 +2277,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-256]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-256]", "ldur s2, [x9, #-128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2196,7 +2329,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-196]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-196]", "ldur s2, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2228,7 +2365,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-252]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-252]", "ldur s2, [x9, #-124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2276,7 +2417,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-200]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-200]", "ldur s2, [x9, #-76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2308,7 +2453,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-248]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-248]", "ldur s2, [x9, #-120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2356,7 +2505,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-204]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-204]", "ldur s2, [x9, #-80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2388,7 +2541,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-244]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-244]", "ldur s2, [x9, #-116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2436,7 +2593,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-208]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-208]", "ldur s2, [x9, #-84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2468,7 +2629,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-240]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-240]", "ldur s2, [x9, #-112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2516,7 +2681,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-212]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-212]", "ldur s2, [x9, #-88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2548,7 +2717,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-236]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-236]", "ldur s2, [x9, #-108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2596,7 +2769,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-216]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-216]", "ldur s2, [x9, #-92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2628,7 +2805,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-232]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-232]", "ldur s2, [x9, #-104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2676,7 +2857,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-220]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-220]", "ldur s2, [x9, #-96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2708,7 +2893,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-228]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-228]", "ldur s2, [x9, #-100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2756,7 +2945,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-224]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-224]", "ldur s2, [x9, #-64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2788,7 +2981,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-192]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-192]", "ldur s2, [x9, #-4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2836,7 +3033,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-132]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-132]", "ldur s2, [x9, #-60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2868,7 +3069,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-188]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-188]", "ldur s2, [x9, #-8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2916,7 +3121,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-136]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-136]", "ldur s2, [x9, #-56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2948,7 +3157,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-184]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-184]", "ldur s2, [x9, #-12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2996,7 +3209,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-140]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-140]", "ldur s2, [x9, #-52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3028,7 +3245,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-180]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-180]", "ldur s2, [x9, #-16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3076,7 +3297,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-144]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-144]", "ldur s2, [x9, #-48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3108,7 +3333,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-176]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-176]", "ldur s2, [x9, #-20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3156,7 +3385,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-148]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-148]", "ldur s2, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3188,7 +3421,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-172]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-172]", "ldur s2, [x9, #-24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3236,7 +3473,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-152]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-152]", "ldur s2, [x9, #-40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3268,7 +3509,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-168]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-168]", "ldur s2, [x9, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3316,7 +3561,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-156]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-156]", "ldur s2, [x9, #-36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3348,7 +3597,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-164]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-164]", "ldur s2, [x9, #-32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3389,9 +3642,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xe35c", - "movk w20, #0x100d, lsl #16", - "ldr w4, [x20]", + "mov w21, #0xe35c", + "movk w21, #0x100d, lsl #16", + "ldr w4, [x21]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -3399,7 +3652,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-160]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-160]", "ldur s2, [x9, #-228]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3431,7 +3688,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-128]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-128]", "ldur s2, [x9, #-256]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3479,7 +3740,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-100]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-100]", "ldur s2, [x9, #-232]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3511,7 +3776,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-124]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-124]", "ldur s2, [x9, #-252]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3559,7 +3828,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-104]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-104]", "ldur s2, [x9, #-236]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3591,7 +3864,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-120]", "ldur s2, [x9, #-248]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3639,7 +3916,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-108]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-108]", "ldur s2, [x9, #-240]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3671,7 +3952,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-116]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-116]", "ldur s2, [x9, #-244]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3719,7 +4004,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-112]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-112]", "ldur s2, [x9, #-224]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3751,7 +4040,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-96]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-96]", "ldur s2, [x9, #-196]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3799,7 +4092,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-68]", "ldur s2, [x9, #-220]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3831,7 +4128,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-92]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-92]", "ldur s2, [x9, #-200]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3879,7 +4180,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-72]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-72]", "ldur s2, [x9, #-216]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3911,7 +4216,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-88]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-88]", "ldur s2, [x9, #-204]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3959,7 +4268,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-76]", "ldur s2, [x9, #-212]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3991,7 +4304,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-84]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-84]", "ldur s2, [x9, #-208]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4039,7 +4356,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-80]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-80]", "ldur s2, [x9, #-164]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4071,7 +4392,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-64]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-64]", "ldur s2, [x9, #-192]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4119,7 +4444,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-36]", "ldur s2, [x9, #-168]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4151,7 +4480,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-60]", "ldur s2, [x9, #-188]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4199,7 +4532,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-40]", "ldur s2, [x9, #-172]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4231,7 +4568,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-56]", "ldur s2, [x9, #-184]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4279,7 +4620,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-44]", "ldur s2, [x9, #-176]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4311,7 +4656,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-52]", "ldur s2, [x9, #-180]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4359,7 +4708,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-48]", "ldur s2, [x9, #-160]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4391,7 +4744,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldur s2, [x9, #-132]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4439,7 +4796,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldur s2, [x9, #-156]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4471,7 +4832,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldur s2, [x9, #-136]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4519,7 +4884,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldur s2, [x9, #-152]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4551,7 +4920,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldur s2, [x9, #-140]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4599,7 +4972,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldur s2, [x9, #-148]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4631,7 +5008,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldur s2, [x9, #-144]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4672,9 +5053,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xe360", - "movk w20, #0x100d, lsl #16", - "ldr w4, [x20]", + "mov w21, #0xe360", + "movk w21, #0x100d, lsl #16", + "ldr w4, [x21]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -4682,12 +5063,24 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldr s2, [x4]", - "str s2, [x9, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x9, #16]", "ldr s2, [x4, #4]", - "mov x20, #0xfffffffffffffefc", - "str s2, [x9, x20, sxtx]", + "mov x21, #0xfffffffffffffefc", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x9, x21, sxtx]", "ldur s2, [x9, #-116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4719,7 +5112,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-256]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-256]", "ldur s2, [x9, #-128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4768,7 +5165,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-244]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-244]", "ldur s2, [x9, #-120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4800,7 +5201,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-252]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-252]", "ldur s2, [x9, #-124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4825,7 +5230,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s4, [x9, x20, sxtx]", + "ldr s4, [x9, x21, sxtx]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -4849,7 +5254,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-248]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-248]", "ldur s2, [x9, #-100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4881,7 +5290,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-240]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-240]", "ldur s2, [x9, #-100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4921,7 +5334,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-228]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-228]", "ldur s2, [x9, #-104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4953,7 +5370,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-236]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-236]", "ldur s2, [x9, #-104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4993,7 +5414,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-232]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-232]", "ldur s2, [x9, #-84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5025,7 +5450,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-224]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-224]", "ldur s2, [x9, #-96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5065,7 +5494,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-212]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-212]", "ldur s2, [x9, #-88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5097,7 +5530,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-220]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-220]", "ldur s2, [x9, #-92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5137,7 +5574,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-216]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-216]", "ldur s2, [x9, #-68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5169,7 +5610,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-208]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-208]", "ldur s2, [x9, #-68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5209,7 +5654,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-196]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-196]", "ldur s2, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5218,9 +5667,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xe364", - "movk w20, #0x100d, lsl #16", - "ldr w4, [x20]", + "mov w21, #0xe364", + "movk w21, #0x100d, lsl #16", + "ldr w4, [x21]", "ldur s5, [x9, #-76]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -5244,7 +5693,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-204]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-204]", "ldur s2, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5284,7 +5737,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-200]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-200]", "ldur s2, [x9, #-64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5316,7 +5773,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-192]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-192]", "ldur s2, [x9, #-64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5356,7 +5817,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-180]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-180]", "ldur s2, [x9, #-60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5388,7 +5853,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-188]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-188]", "ldur s2, [x9, #-60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5428,7 +5897,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-184]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-184]", "ldur s2, [x9, #-48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5460,7 +5933,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-176]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-176]", "ldur s2, [x9, #-36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5500,7 +5977,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-164]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-164]", "ldur s2, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5532,7 +6013,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-172]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-172]", "ldur s2, [x9, #-40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5572,7 +6057,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-168]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-168]", "ldur s2, [x9, #-32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5604,7 +6093,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-160]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-160]", "ldur s2, [x9, #-32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5644,7 +6137,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-148]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-148]", "ldur s2, [x9, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5676,7 +6173,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-156]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-156]", "ldur s2, [x9, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5716,7 +6217,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-152]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-152]", "ldur s2, [x9, #-16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5748,7 +6253,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-144]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "stur s6, [x9, #-144]", "ldur s2, [x9, #-4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5789,7 +6298,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-132]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "stur s5, [x9, #-132]", "ldur s2, [x9, #-12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5821,7 +6334,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-140]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "stur s5, [x9, #-140]", "ldur s2, [x9, #-8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5861,9 +6378,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-136]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-136]", "ldr s2, [x4]", - "str s2, [x9, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x9, #16]", "ldur s2, [x9, #-252]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5895,7 +6420,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-128]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-128]", "ldur s2, [x9, #-256]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5944,7 +6473,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-124]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-124]", "ldur s2, [x9, #-244]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5976,7 +6509,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-120]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-120]", "ldur s2, [x9, #-244]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6016,7 +6553,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-116]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-116]", "ldur s2, [x9, #-116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6048,7 +6589,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-120]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-120]", "ldur s2, [x9, #-236]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6080,7 +6625,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-112]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-112]", "ldur s2, [x9, #-240]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6120,7 +6669,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-108]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-108]", "ldur s2, [x9, #-228]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6152,7 +6705,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-104]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-104]", "ldur s2, [x9, #-228]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6192,7 +6749,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-100]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-100]", "ldur s2, [x9, #-100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6224,7 +6785,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-104]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-104]", "ldur s2, [x9, #-104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6256,7 +6821,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-112]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-112]", "ldur s2, [x9, #-104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6288,7 +6857,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-104]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-104]", "ldur s2, [x9, #-100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6320,7 +6893,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-108]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-108]", "ldur s2, [x9, #-224]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6352,7 +6929,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-96]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-96]", "ldur s2, [x9, #-224]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6392,7 +6973,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-92]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-92]", "ldur s2, [x9, #-216]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6424,7 +7009,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-88]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-88]", "ldur s2, [x9, #-212]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6464,7 +7053,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-84]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-84]", "ldur s2, [x9, #-84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6496,7 +7089,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-88]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-88]", "ldur s2, [x9, #-208]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6528,7 +7125,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-80]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-80]", "ldur s2, [x9, #-208]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6568,7 +7169,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-76]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-76]", "ldur s2, [x9, #-200]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6600,7 +7205,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-72]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-72]", "ldur s2, [x9, #-196]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6640,7 +7249,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-68]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x9, #-68]", "ldur s2, [x9, #-68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6672,7 +7285,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-72]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-72]", "ldur s5, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6704,7 +7321,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-80]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-80]", "ldur s5, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6736,7 +7357,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-72]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-72]", "ldur s5, [x9, #-76]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6760,7 +7385,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-76]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-76]", "ldur s5, [x9, #-188]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6792,7 +7421,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-64]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-64]", "ldur s5, [x9, #-192]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6832,7 +7465,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-60]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-60]", "ldur s5, [x9, #-180]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6864,7 +7501,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-56]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-56]", "ldur s5, [x9, #-180]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6904,7 +7545,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-52]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-52]", "ldur s5, [x9, #-56]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6936,7 +7581,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-56]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-56]", "ldur s5, [x9, #-172]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6968,7 +7617,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-48]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-48]", "ldur s5, [x9, #-176]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7008,7 +7661,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-44]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-44]", "ldur s5, [x9, #-164]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7040,7 +7697,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-40]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-40]", "ldur s5, [x9, #-164]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7080,7 +7741,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-36]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x9, #-36]", "ldur s5, [x9, #-40]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7113,7 +7778,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-40]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-40]", "ldur s5, [x9, #-48]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7145,7 +7814,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-48]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-48]", "ldur s5, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7177,7 +7850,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-40]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-40]", "ldur s5, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7201,7 +7878,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-44]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-44]", "ldur s5, [x9, #-160]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7233,7 +7914,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-32]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-32]", "ldur s5, [x9, #-160]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7273,7 +7958,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-28]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-28]", "ldur s5, [x9, #-152]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7305,7 +7994,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-24]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-24]", "ldur s5, [x9, #-148]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7345,7 +8038,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-20]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-20]", "ldur s5, [x9, #-24]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7377,7 +8074,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-24]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-24]", "ldur s5, [x9, #-144]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7409,7 +8110,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-16]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-16]", "ldr w4, [x9, #8]", "ldur s5, [x9, #-144]", "str x30, [sp, #-16]!", @@ -7451,7 +8156,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-12]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-12]", "ldur s5, [x9, #-136]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7483,7 +8192,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x9, #-8]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "stur s8, [x9, #-8]", "ldur s5, [x9, #-132]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7524,7 +8237,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-4]", + "fmov s5, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v7.8b, v3.8b, v5.8b", + "stur s7, [x9, #-4]", "ldur s3, [x9, #-8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7557,7 +8274,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-8]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-8]", "ldur s3, [x9, #-16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7589,7 +8310,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-16]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-16]", "ldur s3, [x9, #-12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7621,7 +8346,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-8]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-8]", "ldur s3, [x9, #-12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7645,25 +8374,65 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-12]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-12]", "ldur s3, [x9, #-128]", - "str s3, [x4, #1024]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #1024]", "ldur s3, [x9, #-112]", - "str s3, [x4, #768]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #768]", "ldur s3, [x9, #-120]", - "str s3, [x4, #512]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #512]", "ldur s3, [x9, #-104]", - "str s3, [x4, #256]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #256]", "ldur s3, [x9, #-124]", - "str s3, [x4]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4]", "ldur s3, [x9, #-124]", - "str s3, [x7]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7]", "ldur s3, [x9, #-108]", - "str s3, [x7, #256]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #256]", "ldur s3, [x9, #-116]", - "str s3, [x7, #512]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #512]", "ldur s3, [x9, #-100]", - "str s3, [x7, #768]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #768]", "ldur s3, [x9, #-80]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7695,9 +8464,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-96]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-96]", "ldur s3, [x9, #-96]", - "str s3, [x4, #896]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #896]", "ldur s3, [x9, #-80]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7729,9 +8506,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-80]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-80]", "ldur s3, [x9, #-80]", - "str s3, [x4, #640]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #640]", "ldur s3, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7763,9 +8548,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-88]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-88]", "ldur s3, [x9, #-88]", - "str s3, [x4, #384]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #384]", "ldur s3, [x9, #-72]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7797,9 +8590,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-72]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-72]", "ldur s3, [x9, #-72]", - "str s3, [x4, #128]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x4, #128]", "ldur s3, [x9, #-76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7831,9 +8632,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-92]", "ldur s3, [x9, #-92]", - "str s3, [x7, #128]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #128]", "ldur s3, [x9, #-76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7865,9 +8674,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-76]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "stur s9, [x9, #-76]", "ldur s3, [x9, #-76]", - "str s3, [x7, #384]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #384]", "ldur s3, [x9, #-84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7891,11 +8708,23 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x9, #-84]", + "fmov s4, w20", + "fcmeq v8.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v8.8b, v3.8b, v4.8b", + "stur s8, [x9, #-84]", "ldur s3, [x9, #-84]", - "str s3, [x7, #640]", + "fmov s4, w20", + "fcmeq v8.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v8.8b, v3.8b, v4.8b", + "str s8, [x7, #640]", "strb wzr, [x28, #1049]", - "str s2, [x7, #896]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #896]", "ldur s2, [x9, #-32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7927,7 +8756,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldur s2, [x9, #-64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7959,7 +8792,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #960]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #960]", "ldur s2, [x9, #-48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7991,7 +8828,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #832]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #832]", "ldur s2, [x9, #-24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8023,7 +8864,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldur s2, [x9, #-48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8055,7 +8900,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #704]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #704]", "ldur s2, [x9, #-56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8087,7 +8936,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #576]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #576]", "ldur s2, [x9, #-24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8119,7 +8972,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldur s2, [x9, #-56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8151,7 +9008,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #448]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #448]", "ldur s2, [x9, #-40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8183,7 +9044,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #320]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #320]", "ldur s2, [x9, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8215,7 +9080,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldur s2, [x9, #-40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8247,7 +9116,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #192]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #192]", "ldur s2, [x9, #-60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8279,7 +9152,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #64]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #64]", "ldur s2, [x9, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8311,7 +9188,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldur s2, [x9, #-60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8343,7 +9224,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #64]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #64]", "ldur s2, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8375,7 +9260,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #192]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #192]", "ldur s2, [x9, #-20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8407,7 +9296,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldur s2, [x9, #-44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8439,7 +9332,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #320]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #320]", "ldur s2, [x9, #-52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8471,7 +9368,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #448]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #448]", "ldur s2, [x9, #-20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8495,7 +9396,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldur s2, [x9, #-52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8527,7 +9432,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #576]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #576]", "ldur s2, [x9, #-20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8551,7 +9460,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #704]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #704]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -8568,8 +9481,16 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #832]", - "str s5, [x7, #960]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #832]", + "fmov s2, w20", + "fcmeq v3.4s, v5.4s, v5.4s", + "orr v2.8b, v5.8b, v2.8b", + "bsl v3.8b, v5.8b, v2.8b", + "str s3, [x7, #960]", "mov x8, x9", "ldp w9, w20, [x8], #8", "ldrb w21, [x28, #1051]", @@ -8582,7 +9503,7 @@ "strb w21, [x28, #1202]" ], "x86InstructionCount": 809, - "ExpectedInstructionCount": 7755 + "ExpectedInstructionCount": 8676 } } } diff --git a/unittests/InstructionCountCI/FlagM/x87-HalfLife.json b/unittests/InstructionCountCI/FlagM/x87-HalfLife.json index a41c2ad107..ce5aa06f59 100644 --- a/unittests/InstructionCountCI/FlagM/x87-HalfLife.json +++ b/unittests/InstructionCountCI/FlagM/x87-HalfLife.json @@ -14,7 +14,7 @@ "Instructions": { "Block1": { "x86InstructionCount": 70, - "ExpectedInstructionCount": 412, + "ExpectedInstructionCount": 429, "x86Insts": [ "sub esp,0x2c", "mov ecx,dword [esp + 0x34]", @@ -149,7 +149,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", + "mov w20, #0x400000", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", "ldr s7, [x7, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -181,7 +186,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #20]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #20]", "ldr s9, [x4]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -206,7 +215,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s10, w20", + "fcmeq v11.4s, v2.4s, v2.4s", + "orr v10.8b, v2.8b, v10.8b", + "bsl v11.8b, v2.8b, v10.8b", + "str s11, [x8, #24]", "ldr s2, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -231,7 +244,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #28]", + "fmov s10, w20", + "fcmeq v11.4s, v5.4s, v5.4s", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str s11, [x8, #28]", "ldr s5, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -762,7 +779,7 @@ }, "Block3": { "x86InstructionCount": 32, - "ExpectedInstructionCount": 231, + "ExpectedInstructionCount": 240, "x86Insts": [ "fld dword [ecx]", "fld dword [edx + 0x4]", @@ -831,7 +848,12 @@ "ldr x30, [sp], #16", "mov v5.16b, v0.16b", "ldr s6, [x7, #8]", - "str s6, [x8]", + "mov w20, #0x400000", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8]", "ldr s6, [x5, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -965,8 +987,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "mov w20, #0x1", - "strb w20, [x8]", + "mov w21, #0x1", + "strb w21, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v3.16b", @@ -999,14 +1021,14 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldrb w20, [x28, #1051]", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", - "add w21, w20, #0x1 (1)", + "ldrb w21, [x28, #1051]", + "add w21, w21, #0x7 (7)", "and w21, w21, #0x7", - "add x22, x28, x21, lsl #4", - "ldr q3, [x22, #1056]", - "add x20, x28, x20, lsl #4", + "add w22, w21, #0x1 (1)", + "and w22, w22, #0x7", + "add x23, x28, x22, lsl #4", + "ldr q3, [x23, #1056]", + "add x21, x28, x21, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -1022,18 +1044,22 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x10]", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #1051]", - "str q2, [x20, #1056]", - "str q3, [x22, #1056]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x10]", + "add w20, w22, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1051]", + "str q2, [x21, #1056]", + "str q3, [x23, #1056]", "strb wzr, [x28, #1202]" ] }, "Block4": { "x86InstructionCount": 54, - "ExpectedInstructionCount": 75, + "ExpectedInstructionCount": 85, "x86Insts": [ "push ebp", "push edi", @@ -1138,7 +1164,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1154,7 +1185,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov d2, d0", - "str d2, [x8]", + "mov x20, #0x8000000000000", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x8]", "mov w20, #0x44", "movk w20, #0x1, lsl #16", "str w20, [x8, #-4]!", @@ -1170,7 +1206,7 @@ }, "Block5": { "x86InstructionCount": 49, - "ExpectedInstructionCount": 300, + "ExpectedInstructionCount": 321, "x86Insts": [ "fld dword [esp + 0x80]", "fsub dword [esp + 0x7c]", @@ -1258,7 +1294,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #52]", + "mov w20, #0x400000", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #52]", "ldr s3, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -1299,7 +1340,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #44]", "ldr s2, [x4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1356,7 +1401,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #68]", "ldr s2, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1412,7 +1461,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #72]", "ldr s2, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1474,7 +1527,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #76]", "movi v2.2d, #0x0", "ldr s3, [x8, #40]", "str x30, [sp, #-16]!", @@ -1868,7 +1925,7 @@ }, "Block7": { "x86InstructionCount": 25, - "ExpectedInstructionCount": 244, + "ExpectedInstructionCount": 249, "x86Insts": [ "fld dword [ebx + 0x4]", "fld dword [ebx]", @@ -2131,7 +2188,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x11]", + "mov w22, #0x400000", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x11]", "strb w21, [x28, #1051]", "str q2, [x20, #1056]", "ldrb w20, [x28, #1202]", @@ -2145,7 +2207,7 @@ }, "Block8": { "x86InstructionCount": 25, - "ExpectedInstructionCount": 72, + "ExpectedInstructionCount": 90, "x86Insts": [ "fstp st0", "fstp st3", @@ -2204,7 +2266,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #56]", + "mov w12, #0x400000", + "fmov s6, w12", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #56]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -2213,11 +2280,15 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #44]", + "fmov s6, w12", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #44]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", - "add x12, x28, x20, lsl #4", - "ldr q5, [x12, #1056]", + "add x13, x28, x20, lsl #4", + "ldr q5, [x13, #1056]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -2226,7 +2297,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #40]", + "fmov s7, w12", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1640]", @@ -2234,23 +2309,28 @@ "blr x0", "ldr x30, [sp], #16", "fmov d6, d0", - "str d6, [x8]", + "mov x12, #0x8000000000000", + "fmov d7, x12", + "fcmeq d8, d6, d6", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str d8, [x8]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", - "mov w13, #0x24", - "movk w13, #0x1, lsl #16", - "str w13, [x8, #-4]!", + "mov w12, #0x24", + "movk w12, #0x1, lsl #16", + "str w12, [x8, #-4]!", "strb w20, [x28, #1051]", "str q3, [x21, #1056]", "str q4, [x22, #1056]", "str q5, [x23, #1056]", - "str q2, [x12, #1056]", + "str q2, [x13, #1056]", "strb wzr, [x28, #1202]" ] }, "Block9": { "x86InstructionCount": 25, - "ExpectedInstructionCount": 72, + "ExpectedInstructionCount": 90, "x86Insts": [ "fstp st0", "fstp st3", @@ -2309,7 +2389,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #56]", + "mov w12, #0x400000", + "fmov s6, w12", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #56]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -2318,11 +2403,15 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #44]", + "fmov s6, w12", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #44]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", - "add x12, x28, x20, lsl #4", - "ldr q5, [x12, #1056]", + "add x13, x28, x20, lsl #4", + "ldr q5, [x13, #1056]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -2331,7 +2420,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #40]", + "fmov s7, w12", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1640]", @@ -2339,17 +2432,22 @@ "blr x0", "ldr x30, [sp], #16", "fmov d6, d0", - "str d6, [x8]", + "mov x12, #0x8000000000000", + "fmov d7, x12", + "fcmeq d8, d6, d6", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str d8, [x8]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", - "mov w13, #0x24", - "movk w13, #0x1, lsl #16", - "str w13, [x8, #-4]!", + "mov w12, #0x24", + "movk w12, #0x1, lsl #16", + "str w12, [x8, #-4]!", "strb w20, [x28, #1051]", "str q3, [x21, #1056]", "str q4, [x22, #1056]", "str q5, [x23, #1056]", - "str q2, [x12, #1056]", + "str q2, [x13, #1056]", "strb wzr, [x28, #1202]" ] }, diff --git a/unittests/InstructionCountCI/FlagM/x87-Oblivion.json b/unittests/InstructionCountCI/FlagM/x87-Oblivion.json index cd82aeedc1..5ddba9b4f4 100644 --- a/unittests/InstructionCountCI/FlagM/x87-Oblivion.json +++ b/unittests/InstructionCountCI/FlagM/x87-Oblivion.json @@ -14,7 +14,7 @@ "Instructions": { "Block1": { "x86InstructionCount": 911, - "ExpectedInstructionCount": 7343, + "ExpectedInstructionCount": 8277, "x86Insts": [ "sub esp,0x118", "fld dword [ecx + 0x1084]", @@ -961,7 +961,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8]", "ldr s2, [x7, #4224]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -993,7 +998,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #4]", "ldr s2, [x7, #4220]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1025,7 +1034,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #8]", "ldr s2, [x7, #4216]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1057,7 +1070,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "ldr s2, [x7, #4212]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1089,7 +1106,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "ldr s2, [x7, #4208]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1121,7 +1142,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x7, #4204]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1153,7 +1178,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x7, #4200]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1185,7 +1214,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #28]", "ldr s2, [x7, #4196]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1217,7 +1250,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #32]", "ldr s2, [x7, #4192]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1249,7 +1286,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #36]", "ldr s2, [x7, #4188]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1281,7 +1322,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldr s2, [x7, #4184]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1313,7 +1358,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x7, #4180]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1345,7 +1394,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x7, #4176]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1377,7 +1430,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #52]", "ldr s2, [x7, #4172]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1409,7 +1466,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #56]", "ldr s2, [x7, #4168]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1441,7 +1502,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #60]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1474,7 +1539,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #68]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #68]", "ldr s4, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -1507,7 +1576,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #72]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -1540,7 +1613,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #76]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #76]", "ldr s8, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1572,7 +1649,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #80]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #80]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1604,7 +1685,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #84]", "ldr s8, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1636,7 +1721,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #88]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #88]", "ldr s8, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1668,7 +1757,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #92]", "ldr s8, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1700,7 +1793,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #96]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #96]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -1710,9 +1807,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d0", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1d0", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1736,7 +1833,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #100]", + "fmov s3, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v8.8b, v2.8b, v3.8b", + "str s8, [x8, #100]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v4.16b", @@ -1745,9 +1846,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d4", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1d4", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1770,7 +1871,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #104]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #104]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -1779,9 +1884,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d8", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1d8", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1804,7 +1909,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #108]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1829,9 +1938,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1dc", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1dc", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1854,7 +1963,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #112]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #112]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1879,9 +1992,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e0", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1e0", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1904,7 +2017,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #116]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #116]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1929,9 +2046,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e4", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1e4", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -1954,7 +2071,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #120]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -1979,9 +2100,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e8", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1e8", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -2004,7 +2125,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #124]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #124]", "ldr s2, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2029,9 +2154,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1ec", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1ec", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -2054,7 +2179,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #128]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #128]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2087,7 +2216,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8]", "ldr s4, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -2120,7 +2253,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #4]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #4]", "ldr s6, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -2153,7 +2290,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr s8, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2185,7 +2326,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -2195,9 +2340,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1f0", - "movk w20, #0xb3, lsl #16", - "ldr s3, [x20]", + "mov w21, #0xc1f0", + "movk w21, #0xb3, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -2221,7 +2366,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #16]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -2231,9 +2380,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1f4", - "movk w20, #0xb3, lsl #16", - "ldr s4, [x20]", + "mov w21, #0xc1f4", + "movk w21, #0xb3, lsl #16", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -2257,7 +2406,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s5, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v8.8b, v2.8b, v5.8b", + "str s8, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -2266,9 +2419,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1f8", - "movk w20, #0xb3, lsl #16", - "ldr s5, [x20]", + "mov w21, #0xc1f8", + "movk w21, #0xb3, lsl #16", + "ldr s5, [x21]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -2292,7 +2445,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #24]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2317,9 +2474,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1fc", - "movk w20, #0xb3, lsl #16", - "ldr s6, [x20]", + "mov w21, #0xc1fc", + "movk w21, #0xb3, lsl #16", + "ldr s6, [x21]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -2343,7 +2500,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x8, #28]", "ldr s2, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2376,7 +2537,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #32]", "ldr s8, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2408,7 +2573,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #36]", "ldr s8, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2440,7 +2609,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #40]", "ldr s8, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2472,7 +2645,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v2.16b", @@ -2496,7 +2673,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x8, #48]", "ldr s2, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2536,7 +2717,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x8, #52]", "ldr s2, [x8, #108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2576,7 +2761,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x8, #56]", "ldr s2, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2616,7 +2805,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #60]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2649,7 +2842,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #68]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #68]", "ldr s7, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2681,7 +2878,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", @@ -2691,9 +2892,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc200", - "movk w20, #0xb3, lsl #16", - "ldr s6, [x20]", + "mov w21, #0xc200", + "movk w21, #0xb3, lsl #16", + "ldr s6, [x21]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -2717,7 +2918,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #76]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2734,9 +2939,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc204", - "movk w20, #0xb3, lsl #16", - "ldr s7, [x20]", + "mov w21, #0xc204", + "movk w21, #0xb3, lsl #16", + "ldr s7, [x21]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -2760,7 +2965,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #80]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #80]", "ldr s2, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2792,7 +3001,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #84]", "ldr s8, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2824,7 +3037,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #88]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #88]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2856,7 +3073,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #92]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2896,7 +3117,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #96]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #96]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -2928,7 +3153,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #100]", "ldr s8, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2960,7 +3189,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #104]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #104]", "ldr s8, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2992,7 +3225,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #108]", "ldr s2, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3032,7 +3269,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #112]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #112]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3064,7 +3305,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr s8, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -3096,7 +3341,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #120]", "ldr s8, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -3128,7 +3377,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #124]", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3168,7 +3421,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #128]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #128]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3200,7 +3457,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -3217,9 +3478,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc208", - "movk w20, #0xb3, lsl #16", - "ldr s8, [x20]", + "mov w21, #0xc208", + "movk w21, #0xb3, lsl #16", + "ldr s8, [x21]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -3243,7 +3504,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #4]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3275,7 +3540,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #8]", "ldr s2, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3315,7 +3584,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #12]", "ldr s2, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3347,7 +3620,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #16]", "ldr s2, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3387,7 +3664,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #20]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3419,7 +3700,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #24]", "ldr s2, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3459,7 +3744,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #28]", "ldr s2, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3491,7 +3780,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #32]", "ldr s2, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3531,7 +3824,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #36]", "ldr s2, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3563,7 +3860,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #40]", "ldr s2, [x8, #108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3603,7 +3904,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #44]", "ldr s2, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3635,7 +3940,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #48]", "ldr s2, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3675,7 +3984,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #52]", "ldr s2, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3707,7 +4020,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #56]", "ldr s2, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3747,7 +4064,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #60]", "ldr s2, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3756,7 +4077,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v9.16b, v0.16b", - "str s2, [x8, #192]", + "fmov s10, w20", + "fcmeq v11.4s, v2.4s, v2.4s", + "orr v10.8b, v2.8b, v10.8b", + "bsl v11.8b, v2.8b, v10.8b", + "str s11, [x8, #192]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3780,7 +4105,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #160]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #160]", "ldr s2, [x8, #160]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3798,7 +4127,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #140]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #140]", "ldr s2, [x8, #140]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3830,7 +4163,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #220]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #220]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3880,7 +4217,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #252]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #252]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3889,7 +4230,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v9.16b, v0.16b", - "str s2, [x8, #200]", + "fmov s10, w20", + "fcmeq v11.4s, v2.4s, v2.4s", + "orr v10.8b, v2.8b, v10.8b", + "bsl v11.8b, v2.8b, v10.8b", + "str s11, [x8, #200]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3913,7 +4258,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #184]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #184]", "ldr s2, [x8, #184]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3945,7 +4294,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #168]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #168]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -3993,7 +4346,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #152]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #152]", "ldr s2, [x8, #152]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4011,7 +4368,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #132]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #132]", "ldr s2, [x8, #132]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4043,7 +4404,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #212]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #212]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4077,7 +4442,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #272]", + "mov x21, #0x8000000000000", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #272]", "ldr s9, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -4117,7 +4487,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #64]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4149,7 +4523,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #228]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #228]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4197,7 +4575,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #260]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #260]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4229,7 +4611,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #244]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #244]", "ldr s2, [x8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4247,9 +4633,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #268]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #268]", "ldr s2, [x8, #4]", - "str s2, [x8, #144]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #144]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4258,7 +4652,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v9.16b, v0.16b", - "str s2, [x8, #176]", + "fmov s10, w20", + "fcmeq v11.4s, v2.4s, v2.4s", + "orr v10.8b, v2.8b, v10.8b", + "bsl v11.8b, v2.8b, v10.8b", + "str s11, [x8, #176]", "ldr q2, [x28, #3472]", "eor v2.16b, v9.16b, v2.16b", "ldr s9, [x8, #8]", @@ -4284,7 +4682,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #236]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #236]", "ldr s2, [x7, #4104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4309,9 +4711,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc190", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc190", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4334,7 +4736,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8]", "ldr s2, [x7, #4108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4359,9 +4765,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc194", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc194", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4384,7 +4790,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #4]", "ldr s2, [x7, #4112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4409,9 +4819,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc198", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc198", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4434,7 +4844,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #8]", "ldr s2, [x7, #4116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4459,9 +4873,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc19c", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc19c", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4484,7 +4898,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #12]", "ldr s2, [x7, #4120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4509,9 +4927,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1a0", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1a0", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4534,7 +4952,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #16]", "ldr s2, [x7, #4124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4559,9 +4981,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1a4", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1a4", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4584,7 +5006,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #20]", "ldr s2, [x7, #4128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4609,9 +5035,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1a8", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1a8", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4634,7 +5060,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #24]", "ldr s2, [x7, #4132]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4659,9 +5089,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1ac", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1ac", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4684,7 +5114,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #28]", "ldr s2, [x7, #4136]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4709,9 +5143,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1b0", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1b0", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4734,7 +5168,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #32]", "ldr s2, [x7, #4140]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4759,9 +5197,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1b4", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1b4", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4784,7 +5222,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #36]", "ldr s2, [x7, #4144]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4809,9 +5251,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1b8", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1b8", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4834,7 +5276,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #40]", "ldr s2, [x7, #4148]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4859,9 +5305,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1bc", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1bc", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4884,7 +5330,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #44]", "ldr s2, [x7, #4152]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4909,9 +5359,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1c0", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1c0", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4934,7 +5384,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #48]", "ldr s2, [x7, #4156]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -4959,9 +5413,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1c4", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1c4", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -4984,7 +5438,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #52]", "ldr s2, [x7, #4160]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5009,9 +5467,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1c8", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1c8", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5034,7 +5492,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #56]", "ldr s2, [x7, #4164]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5059,9 +5521,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1cc", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1cc", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5084,7 +5546,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #60]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5116,7 +5582,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #68]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5148,7 +5618,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #72]", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5180,7 +5654,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #76]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5212,7 +5690,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #80]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #80]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5244,7 +5726,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #84]", "ldr s2, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5276,7 +5762,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #88]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #88]", "ldr s2, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5308,7 +5798,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #92]", "ldr s2, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5340,7 +5834,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #96]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #96]", "ldr s2, [x8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5365,9 +5863,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d0", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1d0", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5390,7 +5888,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #100]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5415,9 +5917,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d4", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1d4", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5440,7 +5942,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #104]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #104]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5465,9 +5971,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1d8", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1d8", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5490,7 +5996,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #108]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5515,9 +6025,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1dc", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1dc", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5540,7 +6050,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #112]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5565,9 +6079,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e0", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1e0", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5590,7 +6104,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #116]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5615,9 +6133,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e4", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1e4", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5640,7 +6158,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #120]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5665,9 +6187,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1e8", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1e8", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5690,7 +6212,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #124]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #124]", "ldr s2, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5715,9 +6241,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1ec", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1ec", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -5740,7 +6266,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #128]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #128]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5772,7 +6302,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8]", "ldr s2, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5804,7 +6338,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #4]", "ldr s2, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5836,7 +6374,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #8]", "ldr s2, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5868,7 +6410,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #12]", "ldr s2, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5908,7 +6454,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #16]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5948,7 +6498,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #20]", "ldr s2, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5988,7 +6542,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #24]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6013,9 +6571,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xc1fc", - "movk w20, #0xb3, lsl #16", - "ldr s9, [x20]", + "mov w21, #0xc1fc", + "movk w21, #0xb3, lsl #16", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -6038,7 +6596,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #28]", "ldr s2, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6070,7 +6632,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #32]", "ldr s2, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6102,7 +6668,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #36]", "ldr s2, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6134,7 +6704,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #40]", "ldr s2, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6166,7 +6740,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v9.8b, v2.8b, v9.8b", + "bsl v10.8b, v2.8b, v9.8b", + "str s10, [x8, #44]", "ldr s2, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6207,7 +6785,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v9.8b, v2.8b, v3.8b", + "str s9, [x8, #48]", "ldr s2, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6248,7 +6830,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #52]", "ldr s2, [x8, #108]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6289,7 +6875,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #56]", "ldr s2, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6314,7 +6904,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x20]", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -6337,7 +6927,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #60]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6370,7 +6964,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #68]", + "fmov s5, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v9.8b, v4.8b, v5.8b", + "str s9, [x8, #68]", "ldr s4, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -6402,7 +7000,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v5.4s, v5.4s", + "orr v9.8b, v5.8b, v9.8b", + "bsl v10.8b, v5.8b, v9.8b", + "str s10, [x8, #72]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -6428,7 +7030,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "str s5, [x8, #76]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6460,7 +7066,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #80]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #80]", "ldr s2, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6493,7 +7103,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #84]", + "fmov s5, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v9.8b, v4.8b, v5.8b", + "str s9, [x8, #84]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -6525,7 +7139,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #88]", + "fmov s9, w20", + "fcmeq v10.4s, v5.4s, v5.4s", + "orr v9.8b, v5.8b, v9.8b", + "bsl v10.8b, v5.8b, v9.8b", + "str s10, [x8, #88]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -6551,7 +7169,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #92]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "str s5, [x8, #92]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6583,7 +7205,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #96]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #96]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6616,7 +7242,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #100]", + "fmov s5, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v9.8b, v4.8b, v5.8b", + "str s9, [x8, #100]", "ldr s4, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -6648,7 +7278,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #104]", + "fmov s9, w20", + "fcmeq v10.4s, v5.4s, v5.4s", + "orr v9.8b, v5.8b, v9.8b", + "bsl v10.8b, v5.8b, v9.8b", + "str s10, [x8, #104]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -6674,7 +7308,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "str s5, [x8, #108]", "ldr s2, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6706,7 +7344,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #112]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #112]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6739,7 +7381,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #116]", + "fmov s5, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v9.8b, v4.8b, v5.8b", + "str s9, [x8, #116]", "ldr s4, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -6771,7 +7417,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v5.4s, v5.4s", + "orr v9.8b, v5.8b, v9.8b", + "bsl v10.8b, v5.8b, v9.8b", + "str s10, [x8, #120]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -6798,7 +7448,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #124]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "str s5, [x8, #124]", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6830,7 +7484,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #128]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #128]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6863,7 +7521,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -6887,7 +7549,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #4]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6920,7 +7586,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -6944,7 +7614,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "ldr s2, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -6977,7 +7651,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #16]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7001,7 +7679,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7034,7 +7716,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #24]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #24]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7058,7 +7744,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #28]", "ldr s2, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7091,7 +7781,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #32]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #32]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7115,7 +7809,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #36]", "ldr s2, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7148,7 +7846,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #40]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7172,7 +7874,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7205,7 +7911,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #48]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7229,7 +7939,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #52]", "ldr s2, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7262,7 +7976,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #56]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #56]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -7286,7 +8004,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #60]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -7319,7 +8041,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #196]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #196]", "ldr s5, [x8, #196]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7352,7 +8078,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #188]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #188]", "ldr s8, [x8, #188]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7401,7 +8131,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #164]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #164]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v7.16b", @@ -7433,7 +8167,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #180]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #180]", "ldr s8, [x8, #180]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7457,7 +8195,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #172]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #172]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v9.16b", @@ -7489,7 +8231,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #64]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #64]", "ldr s8, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7521,7 +8267,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #148]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #148]", "ldr s8, [x8, #148]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7539,7 +8289,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #272]", + "fmov s10, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str s11, [x8, #272]", "ldr s8, [x8, #272]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7572,7 +8326,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #208]", + "fmov s11, w20", + "fcmeq v12.4s, v8.4s, v8.4s", + "orr v11.8b, v8.8b, v11.8b", + "bsl v12.8b, v8.8b, v11.8b", + "str s12, [x8, #208]", "ldr s8, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7612,7 +8370,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #156]", + "fmov s11, w20", + "fcmeq v12.4s, v8.4s, v8.4s", + "orr v11.8b, v8.8b, v11.8b", + "bsl v12.8b, v8.8b, v11.8b", + "str s12, [x8, #156]", "ldr s8, [x8, #156]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7630,7 +8392,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #136]", + "fmov s11, w20", + "fcmeq v12.4s, v8.4s, v8.4s", + "orr v11.8b, v8.8b, v11.8b", + "bsl v12.8b, v8.8b, v11.8b", + "str s12, [x8, #136]", "ldr s8, [x8, #136]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7670,7 +8436,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #216]", + "fmov s11, w20", + "fcmeq v12.4s, v8.4s, v8.4s", + "orr v11.8b, v8.8b, v11.8b", + "bsl v12.8b, v8.8b, v11.8b", + "str s12, [x8, #216]", "ldr s8, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7713,7 +8483,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #64]", + "fmov s8, w20", + "fcmeq v11.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v11.8b, v7.8b, v8.8b", + "str s11, [x8, #64]", "ldr s7, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -7769,7 +8543,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #232]", + "fmov s11, w20", + "fcmeq v12.4s, v9.4s, v9.4s", + "orr v11.8b, v9.8b, v11.8b", + "bsl v12.8b, v9.8b, v11.8b", + "str s12, [x8, #232]", "ldr s9, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -7818,7 +8596,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #224]", + "fmov s9, w20", + "fcmeq v11.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v11.8b, v8.8b, v9.8b", + "str s11, [x8, #224]", "ldr s8, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7874,7 +8656,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #240]", + "fmov s11, w20", + "fcmeq v12.4s, v9.4s, v9.4s", + "orr v11.8b, v9.8b, v11.8b", + "bsl v12.8b, v9.8b, v11.8b", + "str s12, [x8, #240]", "ldr s9, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -7918,7 +8704,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #64]", + "fmov s9, w20", + "fcmeq v11.4s, v4.4s, v4.4s", + "orr v9.8b, v4.8b, v9.8b", + "bsl v11.8b, v4.8b, v9.8b", + "str s11, [x8, #64]", "ldr s4, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -7943,7 +8733,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #248]", + "fmov s9, w20", + "fcmeq v11.4s, v7.4s, v7.4s", + "orr v9.8b, v7.8b, v9.8b", + "bsl v11.8b, v7.8b, v9.8b", + "str s11, [x8, #248]", "ldr s7, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -7986,7 +8780,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #64]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #64]", "ldr s7, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -8018,7 +8816,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #264]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #264]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v4.16b", @@ -8035,17 +8837,41 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #256]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x8, #256]", "ldr s4, [x8, #144]", - "str s4, [x4]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4]", "ldr s4, [x8, #148]", - "str s4, [x4, #64]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #64]", "ldr s4, [x8, #152]", - "str s4, [x4, #128]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #128]", "ldr s4, [x8, #156]", - "str s4, [x4, #192]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #192]", "ldr s4, [x8, #160]", - "str s4, [x4, #256]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #256]", "ldr s4, [x8, #164]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8054,7 +8880,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "str s4, [x4, #320]", + "fmov s8, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v8.8b, v4.8b, v8.8b", + "bsl v9.8b, v4.8b, v8.8b", + "str s9, [x4, #320]", "ldr s4, [x8, #168]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8063,7 +8893,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "str s4, [x4, #384]", + "fmov s9, w20", + "fcmeq v10.4s, v4.4s, v4.4s", + "orr v9.8b, v4.8b, v9.8b", + "bsl v10.8b, v4.8b, v9.8b", + "str s10, [x4, #384]", "ldr s4, [x8, #172]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8072,7 +8906,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v9.16b, v0.16b", - "str s4, [x4, #448]", + "fmov s10, w20", + "fcmeq v11.4s, v4.4s, v4.4s", + "orr v10.8b, v4.8b, v10.8b", + "bsl v11.8b, v4.8b, v10.8b", + "str s11, [x4, #448]", "ldr s4, [x8, #176]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8081,9 +8919,17 @@ "blr x0", "ldr x30, [sp], #16", "mov v10.16b, v0.16b", - "str s4, [x4, #512]", + "fmov s11, w20", + "fcmeq v12.4s, v4.4s, v4.4s", + "orr v11.8b, v4.8b, v11.8b", + "bsl v12.8b, v4.8b, v11.8b", + "str s12, [x4, #512]", "ldr s4, [x8, #180]", - "str s4, [x4, #576]", + "fmov s11, w20", + "fcmeq v12.4s, v4.4s, v4.4s", + "orr v11.8b, v4.8b, v11.8b", + "bsl v12.8b, v4.8b, v11.8b", + "str s12, [x4, #576]", "ldr s4, [x8, #184]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8092,17 +8938,41 @@ "blr x0", "ldr x30, [sp], #16", "mov v11.16b, v0.16b", - "str s4, [x4, #640]", + "fmov s12, w20", + "fcmeq v13.4s, v4.4s, v4.4s", + "orr v12.8b, v4.8b, v12.8b", + "bsl v13.8b, v4.8b, v12.8b", + "str s13, [x4, #640]", "ldr s4, [x8, #188]", - "str s4, [x4, #704]", + "fmov s12, w20", + "fcmeq v13.4s, v4.4s, v4.4s", + "orr v12.8b, v4.8b, v12.8b", + "bsl v13.8b, v4.8b, v12.8b", + "str s13, [x4, #704]", "ldr s4, [x8, #192]", - "str s4, [x4, #768]", + "fmov s12, w20", + "fcmeq v13.4s, v4.4s, v4.4s", + "orr v12.8b, v4.8b, v12.8b", + "bsl v13.8b, v4.8b, v12.8b", + "str s13, [x4, #768]", "strb wzr, [x28, #1049]", - "str s5, [x4, #832]", + "fmov s4, w20", + "fcmeq v12.4s, v5.4s, v5.4s", + "orr v4.8b, v5.8b, v4.8b", + "bsl v12.8b, v5.8b, v4.8b", + "str s12, [x4, #832]", "ldr s4, [x8, #200]", - "str s4, [x4, #896]", + "fmov s5, w20", + "fcmeq v12.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v12.8b, v4.8b, v5.8b", + "str s12, [x4, #896]", "strb wzr, [x28, #1049]", - "str s2, [x4, #960]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #960]", "movi v2.2d, #0x0", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -8111,7 +8981,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1024]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #1024]", "ldr q2, [x28, #3472]", "eor v2.16b, v3.16b, v2.16b", "str x30, [sp, #-16]!", @@ -8121,7 +8995,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1088]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1088]", "ldr s2, [x8, #200]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8139,7 +9017,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1152]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1152]", "strb wzr, [x28, #1049]", "ldr q2, [x28, #3472]", "eor v2.16b, v6.16b, v2.16b", @@ -8150,7 +9032,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1216]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1216]", "ldr s2, [x8, #192]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8168,7 +9054,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1280]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1280]", "ldr s2, [x8, #188]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8186,7 +9076,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1344]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1344]", "strb wzr, [x28, #1049]", "ldr q2, [x28, #3472]", "eor v2.16b, v11.16b, v2.16b", @@ -8197,7 +9091,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1408]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1408]", "ldr s2, [x8, #180]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8215,7 +9113,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1472]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1472]", "strb wzr, [x28, #1049]", "ldr q2, [x28, #3472]", "eor v2.16b, v10.16b, v2.16b", @@ -8226,7 +9128,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1536]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1536]", "strb wzr, [x28, #1049]", "ldr q2, [x28, #3472]", "eor v2.16b, v9.16b, v2.16b", @@ -8237,7 +9143,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1600]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1600]", "strb wzr, [x28, #1049]", "ldr q2, [x28, #3472]", "eor v2.16b, v8.16b, v2.16b", @@ -8248,7 +9158,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1664]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1664]", "ldr q2, [x28, #3472]", "eor v2.16b, v7.16b, v2.16b", "str x30, [sp, #-16]!", @@ -8258,15 +9172,35 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #1728]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1728]", "ldr s2, [x8, #140]", - "str s2, [x4, #1792]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1792]", "ldr s2, [x8, #136]", - "str s2, [x4, #1856]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1856]", "ldr s2, [x8, #132]", - "str s2, [x4, #1920]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1920]", "ldr s2, [x8, #272]", - "str s2, [x4, #1984]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #1984]", "ldr w20, [x7, #4096]", "eor x27, x20, x7", "subs w26, w20, w7", @@ -8276,7 +9210,7 @@ }, "Block2": { "x86InstructionCount": 630, - "ExpectedInstructionCount": 4711, + "ExpectedInstructionCount": 5333, "x86Insts": [ "mov eax,dword [ebp + 0x8]", "fld dword [eax + 0x40]", @@ -8942,7 +9876,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #68]", + "mov w20, #0x400000", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #68]", "ldr s3, [x4, #60]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8967,7 +9906,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #64]", "ldr s2, [x4, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8992,7 +9935,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #60]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #60]", "ldr s3, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9017,7 +9964,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #56]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #56]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9042,7 +9993,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #52]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #52]", "ldr s3, [x4, #44]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9067,7 +10022,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #48]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #48]", "ldr s2, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9092,7 +10051,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #44]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #44]", "ldr s3, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9117,7 +10080,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #40]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #40]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9142,7 +10109,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #36]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #36]", "ldr s3, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9167,7 +10138,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #32]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #32]", "ldr s2, [x4, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9192,7 +10167,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #28]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #28]", "ldr s3, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9217,7 +10196,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #24]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #24]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9242,7 +10225,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #20]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #20]", "ldr s3, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9267,7 +10254,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #16]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #16]", "ldr s2, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9292,7 +10283,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #12]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #12]", "ldr s3, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9317,7 +10312,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #8]", "ldr s2, [x4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9333,7 +10332,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov d4, d0", - "str d4, [x8, #32]", + "mov x21, #0x8000000000000", + "fmov d5, x21", + "fcmeq d6, d4, d4", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str d6, [x8, #32]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -9350,7 +10354,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #8]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9359,7 +10367,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "str s3, [x4, #4]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x4, #4]", "ldr s3, [x4, #60]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9391,7 +10403,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4, #68]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4, #68]", "ldr s5, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9416,7 +10432,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #60]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x4, #60]", "ldr s3, [x4, #44]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9441,7 +10461,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4, #52]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4, #52]", "ldr s5, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9466,7 +10490,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #44]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x4, #44]", "ldr s3, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9491,7 +10519,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4, #36]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4, #36]", "ldr s5, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9516,7 +10548,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #28]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x4, #28]", "ldr s3, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9541,7 +10577,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v3.16b", @@ -9557,9 +10597,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #4]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #4]", "ldr s3, [x8, #4]", - "str s3, [x4, #12]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v2.16b", @@ -9575,7 +10623,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9591,7 +10643,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d3, d0", - "str d3, [x8, #24]", + "fmov d4, x21", + "fcmeq d5, d3, d3", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str d5, [x8, #24]", "ldr s3, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -9607,7 +10663,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d4, d0", - "str d4, [x8, #40]", + "fmov d5, x21", + "fcmeq d6, d4, d4", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str d6, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -9623,7 +10683,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9647,7 +10711,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d4, d0", - "str d4, [x8, #128]", + "fmov d5, x21", + "fcmeq d6, d4, d4", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str d6, [x8, #128]", "ldr s4, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -9664,9 +10732,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "mov w20, #0x7b70", - "movk w20, #0xa7, lsl #16", - "ldr d6, [x20]", + "mov w22, #0x7b70", + "movk w22, #0xa7, lsl #16", + "ldr d6, [x22]", "str x30, [sp, #-16]!", "fmov d0, d6", "ldr x0, [x28, #1608]", @@ -9691,9 +10759,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "mov w20, #0x7b68", - "movk w20, #0xa7, lsl #16", - "ldr d7, [x20]", + "mov w22, #0x7b68", + "movk w22, #0xa7, lsl #16", + "ldr d7, [x22]", "str x30, [sp, #-16]!", "fmov d0, d7", "ldr x0, [x28, #1608]", @@ -9718,9 +10786,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "mov w20, #0x7b60", - "movk w20, #0xa7, lsl #16", - "ldr d8, [x20]", + "mov w22, #0x7b60", + "movk w22, #0xa7, lsl #16", + "ldr d8, [x22]", "str x30, [sp, #-16]!", "fmov d0, d8", "ldr x0, [x28, #1608]", @@ -9752,7 +10820,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #192]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x8, #192]", "ldr d3, [x8, #40]", "str x30, [sp, #-16]!", "fmov d0, d3", @@ -9816,7 +10888,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #208]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x8, #208]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v8.16b", @@ -9880,7 +10956,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #184]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x8, #184]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v7.16b", @@ -9944,7 +11024,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #200]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x8, #200]", "ldr d3, [x8, #32]", "str x30, [sp, #-16]!", "fmov d0, d3", @@ -10002,7 +11086,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #128]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #128]", "ldr s2, [x4, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10011,9 +11099,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0x7b58", - "movk w20, #0xa7, lsl #16", - "ldr d3, [x20]", + "mov w22, #0x7b58", + "movk w22, #0xa7, lsl #16", + "ldr d3, [x22]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -10036,7 +11124,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10052,7 +11144,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d3, d0", - "str d3, [x8, #24]", + "fmov d4, x21", + "fcmeq d5, d3, d3", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str d5, [x8, #24]", "ldr s3, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -10068,7 +11164,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d4, d0", - "str d4, [x8, #32]", + "fmov d5, x21", + "fcmeq d9, d4, d4", + "orr v5.8b, v4.8b, v5.8b", + "bsl v9.8b, v4.8b, v5.8b", + "str d9, [x8, #32]", "ldr s4, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -10084,7 +11184,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #144]", + "fmov d9, x21", + "fcmeq d10, d5, d5", + "orr v9.8b, v5.8b, v9.8b", + "bsl v10.8b, v5.8b, v9.8b", + "str d10, [x8, #144]", "ldr s5, [x4, #56]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10100,10 +11204,14 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #40]", - "mov w21, #0x7b50", - "movk w21, #0xa7, lsl #16", - "ldr d9, [x21]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #40]", + "mov w23, #0x7b50", + "movk w23, #0xa7, lsl #16", + "ldr d9, [x23]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -10128,9 +11236,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w21, #0x7b48", - "movk w21, #0xa7, lsl #16", - "ldr d3, [x21]", + "mov w23, #0x7b48", + "movk w23, #0xa7, lsl #16", + "ldr d3, [x23]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -10155,9 +11263,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w21, #0x7b40", - "movk w21, #0xa7, lsl #16", - "ldr d4, [x21]", + "mov w23, #0x7b40", + "movk w23, #0xa7, lsl #16", + "ldr d4, [x23]", "str x30, [sp, #-16]!", "fmov d0, d4", "ldr x0, [x28, #1608]", @@ -10189,7 +11297,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #180]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #180]", "ldr d2, [x8, #24]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10231,7 +11343,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr d10, [x20]", + "ldr d10, [x22]", "str x30, [sp, #-16]!", "fmov d0, d10", "ldr x0, [x28, #1608]", @@ -10254,7 +11366,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #196]", + "fmov s10, w20", + "fcmeq v11.4s, v2.4s, v2.4s", + "orr v10.8b, v2.8b, v10.8b", + "bsl v11.8b, v2.8b, v10.8b", + "str s11, [x8, #196]", "ldr d2, [x8, #24]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10335,7 +11451,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #168]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #168]", "ldr d2, [x8, #24]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10423,7 +11543,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #144]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #144]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10439,7 +11563,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #8]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v2.16b", @@ -10455,7 +11583,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #20]", "ldr s2, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10471,7 +11603,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #152]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #152]", "ldr s5, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10487,7 +11623,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #160]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #160]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v5.16b", @@ -10503,7 +11643,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #20]", "ldr s2, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10519,7 +11663,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #32]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #32]", "ldr s5, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10535,7 +11683,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #136]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #136]", "ldr s5, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10551,7 +11703,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #24]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #24]", "ldr s5, [x4, #68]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10567,7 +11723,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d5, d0", - "str d5, [x8, #40]", + "fmov d10, x21", + "fcmeq d11, d5, d5", + "orr v10.8b, v5.8b, v10.8b", + "bsl v11.8b, v5.8b, v10.8b", + "str d11, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v6.16b", @@ -10647,7 +11807,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #176]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #176]", "ldr d2, [x8, #160]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10735,7 +11899,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #48]", "ldr d2, [x8, #32]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10823,7 +11991,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #160]", + "fmov s5, w20", + "fcmeq v10.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v10.8b, v2.8b, v5.8b", + "str s10, [x8, #160]", "ldr d2, [x8, #32]", "str x30, [sp, #-16]!", "fmov d0, d2", @@ -10914,7 +12086,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #40]", + "fmov s6, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v8.8b, v5.8b, v6.8b", + "str s8, [x8, #40]", "ldr d5, [x8, #8]", "str x30, [sp, #-16]!", "fmov d0, d5", @@ -10973,9 +12149,9 @@ "ldr x30, [sp], #16", "mov v2.16b, v0.16b", "strb wzr, [x28, #1049]", - "mov w21, #0x7bd8", - "movk w21, #0xa7, lsl #16", - "ldr d5, [x21]", + "mov w23, #0x7bd8", + "movk w23, #0xa7, lsl #16", + "ldr d5, [x23]", "str x30, [sp, #-16]!", "fmov d0, d5", "ldr x0, [x28, #1608]", @@ -10998,7 +12174,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "str s6, [x8, #24]", "ldr s2, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -11007,7 +12187,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr d5, [x20]", + "ldr d5, [x22]", "str x30, [sp, #-16]!", "fmov d0, d5", "ldr x0, [x28, #1608]", @@ -11030,7 +12210,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "str s6, [x8, #20]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -11118,7 +12302,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #20]", + "fmov s8, w20", + "fcmeq v10.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v10.8b, v7.8b, v8.8b", + "str s10, [x8, #20]", "ldr s7, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -11134,7 +12322,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d8, d0", - "str d8, [x8, #32]", + "fmov d10, x21", + "fcmeq d11, d8, d8", + "orr v10.8b, v8.8b, v10.8b", + "bsl v11.8b, v8.8b, v10.8b", + "str d11, [x8, #32]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -11151,7 +12343,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "ldr d8, [x20]", + "ldr d8, [x22]", "str x30, [sp, #-16]!", "fmov d0, d8", "ldr x0, [x28, #1608]", @@ -11174,7 +12366,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #152]", + "fmov s8, w20", + "fcmeq v10.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v10.8b, v7.8b, v8.8b", + "str s10, [x8, #152]", "ldr d7, [x8, #32]", "str x30, [sp, #-16]!", "fmov d0, d7", @@ -11238,7 +12434,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #136]", + "fmov s8, w20", + "fcmeq v10.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v10.8b, v7.8b, v8.8b", + "str s10, [x8, #136]", "ldr d7, [x8, #32]", "str x30, [sp, #-16]!", "fmov d0, d7", @@ -11303,7 +12503,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #32]", "ldr s2, [x8, #180]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -11335,7 +12539,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #8]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -11360,9 +12568,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "mov w20, #0x7b38", - "movk w20, #0xa7, lsl #16", - "ldr d7, [x20]", + "mov w22, #0x7b38", + "movk w22, #0xa7, lsl #16", + "ldr d7, [x22]", "str x30, [sp, #-16]!", "fmov d0, d7", "ldr x0, [x28, #1608]", @@ -11385,7 +12593,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #4]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #4]", "ldr s6, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11417,7 +12629,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #56]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -11433,7 +12649,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #124]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #124]", "ldr s6, [x8, #196]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11465,7 +12685,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr s8, [x8, #152]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11481,7 +12705,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #208]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #208]", "ldr s9, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -11497,7 +12725,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #152]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #152]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v9.16b", @@ -11506,9 +12738,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "mov w20, #0x7bd0", - "movk w20, #0xa7, lsl #16", - "ldr d9, [x20]", + "mov w22, #0x7bd0", + "movk w22, #0xa7, lsl #16", + "ldr d9, [x22]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -11531,7 +12763,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11555,7 +12791,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #48]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -11571,7 +12811,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #60]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #60]", "ldr d9, [x8, #48]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -11595,7 +12839,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #120]", "ldr s8, [x8, #168]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11611,7 +12859,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #168]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #168]", "ldr s9, [x8, #184]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -11627,7 +12879,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #184]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #184]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v9.16b", @@ -11643,7 +12899,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr s8, [x8, #136]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11659,7 +12919,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #136]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #136]", "ldr s9, [x8, #160]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -11675,7 +12939,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #160]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #160]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v9.16b", @@ -11684,9 +12952,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "mov w20, #0x7b30", - "movk w20, #0xa7, lsl #16", - "ldr d9, [x20]", + "mov w22, #0x7b30", + "movk w22, #0xa7, lsl #16", + "ldr d9, [x22]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -11709,7 +12977,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11733,7 +13005,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #48]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -11749,7 +13025,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #64]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #64]", "ldr d9, [x8, #48]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -11773,7 +13053,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr s8, [x8, #144]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11789,7 +13073,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #144]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #144]", "ldr s9, [x8, #200]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -11805,7 +13093,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #200]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #200]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v9.16b", @@ -11821,7 +13113,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr s8, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11837,7 +13133,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #32]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #32]", "ldr s9, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -11853,7 +13153,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #40]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v9.16b", @@ -11862,9 +13166,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "mov w20, #0x7b28", - "movk w20, #0xa7, lsl #16", - "ldr d9, [x20]", + "mov w22, #0x7b28", + "movk w22, #0xa7, lsl #16", + "ldr d9, [x22]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -11887,7 +13191,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11911,7 +13219,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #48]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -11927,7 +13239,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #68]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #68]", "ldr d9, [x8, #48]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -11951,7 +13267,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #112]", "ldr s8, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -11975,7 +13295,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #128]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #128]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -11991,7 +13315,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #72]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #72]", "ldr d9, [x8, #128]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -12015,7 +13343,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #108]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #108]", "ldr d8, [x8, #200]", "str x30, [sp, #-16]!", "fmov d0, d8", @@ -12047,7 +13379,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr d8, [x8, #40]", "str x30, [sp, #-16]!", "fmov d0, d8", @@ -12072,9 +13408,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "mov w20, #0x7b20", - "movk w20, #0xa7, lsl #16", - "ldr d9, [x20]", + "mov w22, #0x7b20", + "movk w22, #0xa7, lsl #16", + "ldr d9, [x22]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -12097,7 +13433,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -12121,7 +13461,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #48]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -12137,7 +13481,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #76]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #76]", "ldr d9, [x8, #48]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -12161,7 +13509,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #104]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #104]", "ldr d8, [x8, #184]", "str x30, [sp, #-16]!", "fmov d0, d8", @@ -12193,7 +13545,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr d8, [x8, #160]", "str x30, [sp, #-16]!", "fmov d0, d8", @@ -12218,9 +13574,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "mov w20, #0x7b18", - "movk w20, #0xa7, lsl #16", - "ldr d9, [x20]", + "mov w22, #0x7b18", + "movk w22, #0xa7, lsl #16", + "ldr d9, [x22]", "str x30, [sp, #-16]!", "fmov d0, d9", "ldr x0, [x28, #1608]", @@ -12243,7 +13599,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -12267,7 +13627,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d10, d0", - "str d10, [x8, #48]", + "fmov d11, x21", + "fcmeq d12, d10, d10", + "orr v11.8b, v10.8b, v11.8b", + "bsl v12.8b, v10.8b, v11.8b", + "str d12, [x8, #48]", "str x30, [sp, #-16]!", "mov v0.16b, v9.16b", "mov v1.16b, v8.16b", @@ -12283,7 +13647,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #80]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #80]", "ldr d9, [x8, #48]", "str x30, [sp, #-16]!", "fmov d0, d9", @@ -12307,7 +13675,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #100]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -12323,7 +13695,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #8]", "ldr d6, [x8, #152]", "str x30, [sp, #-16]!", "fmov d0, d6", @@ -12348,9 +13724,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "mov w20, #0x7be0", - "movk w20, #0xa7, lsl #16", - "ldr d7, [x20]", + "mov w22, #0x7be0", + "movk w22, #0xa7, lsl #16", + "ldr d7, [x22]", "str x30, [sp, #-16]!", "fmov d0, d7", "ldr x0, [x28, #1608]", @@ -12373,7 +13749,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #4]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #4]", "ldr s6, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12405,7 +13785,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #84]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -12421,7 +13805,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #96]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -12439,7 +13827,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s3, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v6.8b, v2.8b, v3.8b", + "str s6, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v4.16b", @@ -12448,9 +13840,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0x7b10", - "movk w20, #0xa7, lsl #16", - "ldr d3, [x20]", + "mov w22, #0x7b10", + "movk w22, #0xa7, lsl #16", + "ldr d3, [x22]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -12473,7 +13865,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #4]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -12501,10 +13897,10 @@ "ldr x30, [sp], #16", "mov v4.16b, v0.16b", "lsl w4, w4, #4", - "mov w20, #0x83d0", - "movk w20, #0xb1, lsl #16", + "mov w22, #0x83d0", + "movk w22, #0xb1, lsl #16", "mvn w27, w4", - "adds w26, w4, w20", + "adds w26, w4, w22", "mov x4, x26", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -12513,7 +13909,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #88]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #88]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v2.16b", @@ -12529,7 +13929,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #92]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #92]", "ldr s2, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -12563,7 +13967,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7]", "ldr s3, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12598,7 +14006,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #4]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x7, #4]", "ldr s3, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12633,7 +14045,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #8]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x7, #8]", "ldr s3, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12668,7 +14084,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #12]", + "fmov s7, w20", + "fcmeq v8.4s, v3.4s, v3.4s", + "orr v7.8b, v3.8b, v7.8b", + "bsl v8.8b, v3.8b, v7.8b", + "str s8, [x7, #12]", "ldr s3, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12703,7 +14123,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #16]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #16]", "ldr s3, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12738,7 +14162,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #20]", "ldr s3, [x4, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12772,7 +14200,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #24]", "ldr s3, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12806,7 +14238,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #28]", "ldr s3, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12840,7 +14276,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #32]", "ldr s3, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12872,7 +14312,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #36]", "ldr s3, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12904,7 +14348,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #40]", "ldr s3, [x4, #44]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12936,7 +14384,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v3.4s, v3.4s", + "orr v9.8b, v3.8b, v9.8b", + "bsl v10.8b, v3.8b, v9.8b", + "str s10, [x7, #44]", "ldr s3, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12960,7 +14412,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #48]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x7, #48]", "ldr s3, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12984,7 +14440,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #52]", + "fmov s7, w20", + "fcmeq v8.4s, v3.4s, v3.4s", + "orr v7.8b, v3.8b, v7.8b", + "bsl v8.8b, v3.8b, v7.8b", + "str s8, [x7, #52]", "ldr s3, [x4, #56]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13008,7 +14468,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #56]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x7, #56]", "ldr s3, [x4, #60]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13032,7 +14496,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #60]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x7, #60]", "ldr s3, [x4, #64]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13056,7 +14524,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #64]", "ldr s3, [x4, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13080,7 +14552,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #68]", "ldr s2, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -13112,7 +14588,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #72]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #72]", "ldr s3, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13144,7 +14624,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x7, #76]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x7, #76]", "ldr s4, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -13176,7 +14660,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x7, #80]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x7, #80]", "ldr s5, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -13208,7 +14696,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x7, #84]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x7, #84]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -13240,7 +14732,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x7, #88]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x7, #88]", "ldr s7, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -13272,7 +14768,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #92]", "ldr s8, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13288,7 +14788,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #24]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #24]", "ldr s9, [x4, #96]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -13312,7 +14816,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #96]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #96]", "ldr s8, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13328,7 +14836,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov d9, d0", - "str d9, [x8, #128]", + "fmov d10, x21", + "fcmeq d11, d9, d9", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str d11, [x8, #128]", "ldr s9, [x4, #100]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -13352,7 +14864,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #100]", "ldr s8, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13384,7 +14900,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x7, #104]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x7, #104]", "ldr s9, [x4, #108]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -13408,7 +14928,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #108]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #108]", "ldr s8, [x4, #112]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13440,7 +14964,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #112]", "ldr s8, [x4, #116]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13472,7 +15000,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x7, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x7, #116]", "ldr s8, [x4, #120]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -13496,7 +15028,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x7, #120]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x7, #120]", "ldr s7, [x4, #124]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -13520,7 +15056,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x7, #124]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x7, #124]", "ldr s6, [x4, #128]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -13544,7 +15084,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x7, #128]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x7, #128]", "ldr s5, [x4, #132]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -13568,7 +15112,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x7, #132]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x7, #132]", "ldr s4, [x4, #136]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -13592,7 +15140,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #136]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #136]", "ldr s3, [x4, #140]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13616,7 +15168,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #140]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7, #140]", "mov x8, x9", "ldr w9, [x8], #4", "cfinv", @@ -13625,7 +15181,7 @@ }, "Block3": { "x86InstructionCount": 649, - "ExpectedInstructionCount": 3256, + "ExpectedInstructionCount": 4145, "x86Insts": [ "fld dword [esi + 0x64]", "mov eax,dword [esi + 0x88]", @@ -14280,29 +15836,58 @@ "ExpectedArm64ASM": [ "ldr s2, [x10, #100]", "ldr w4, [x10, #136]", - "str s2, [x8, #92]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #92]", "ldr w7, [x10, #140]", "ldr s2, [x10, #112]", "ldr w5, [x10, #144]", - "str s2, [x8, #96]", - "mov w20, #0x3f", - "str w20, [x8, #740]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #96]", + "mov w21, #0x3f", + "str w21, [x8, #740]", "ldr s2, [x10, #124]", "str w4, [x8, #148]", - "str s2, [x8, #100]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #100]", "str w7, [x8, #152]", "ldr s2, [x10, #104]", "str w5, [x8, #156]", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "str w4, [x8, #232]", "ldr s2, [x10, #116]", "str w7, [x8, #236]", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "str w5, [x8, #240]", "ldr s2, [x10, #128]", - "str s2, [x8, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #28]", "ldr s2, [x10, #244]", - "str s2, [x8, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldr s2, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -14334,7 +15919,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #140]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #140]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14358,7 +15947,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #124]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #124]", "ldr s3, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14382,15 +15975,35 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #132]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #132]", "ldr s2, [x10, #108]", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x10, #120]", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x10, #132]", - "str s2, [x8, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #28]", "ldr s2, [x10, #240]", - "str s2, [x8, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldr s2, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -14422,7 +16035,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #84]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #84]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14446,7 +16063,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #80]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #80]", "ldr s3, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14470,9 +16091,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #88]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #88]", "ldr s2, [x10, #256]", - "str s2, [x8, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldr s2, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -14504,9 +16133,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #40]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #40]", "ldr s3, [x8, #40]", - "str s3, [x8, #16]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #16]", "ldr s4, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -14530,9 +16167,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #40]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #40]", "ldr s4, [x8, #40]", - "str s4, [x8, #44]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #44]", "ldr s5, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14556,9 +16201,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #40]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #40]", "ldr s5, [x8, #40]", - "str s5, [x8, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "ldr s6, [x8, #140]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14582,9 +16235,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #76]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #76]", "ldr s6, [x8, #76]", - "str s6, [x8, #68]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #68]", "ldr s6, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14608,9 +16269,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #140]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #140]", "ldr s6, [x8, #140]", - "str s6, [x8, #72]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x8, #132]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14634,9 +16303,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #88]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #88]", "ldr s6, [x8, #88]", - "str s6, [x8, #64]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #64]", "ldr s6, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14660,9 +16337,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #80]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #80]", "ldr s6, [x8, #80]", - "str s6, [x8, #132]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #132]", "ldr s6, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14686,9 +16371,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #84]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #84]", "ldr s6, [x8, #84]", - "str s6, [x8, #124]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #124]", "ldr s6, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14713,9 +16406,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #40]", "ldr s2, [x8, #40]", - "str s2, [x8, #92]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #92]", "ldr s2, [x8, #148]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -14747,7 +16448,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #132]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #132]", "ldr s6, [x8, #152]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14779,7 +16484,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #124]", "ldr s7, [x8, #156]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -14811,7 +16520,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #92]", "ldr s8, [x8, #132]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -14843,7 +16556,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #68]", "ldr s8, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -14875,7 +16592,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "ldr s8, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -14907,7 +16628,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldr s8, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -14939,7 +16664,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -14971,7 +16700,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15003,37 +16736,89 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #16]", - "str s8, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #92]", "ldr w4, [x8, #92]", "ldr s8, [x8, #44]", "str w4, [x8, #244]", - "str s8, [x8, #96]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #96]", "ldr w7, [x8, #96]", "ldr s8, [x8, #20]", "str w7, [x8, #248]", - "str s8, [x8, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #100]", "ldr w5, [x8, #100]", "strb wzr, [x28, #1049]", "str w5, [x8, #252]", - "str s3, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #92]", "strb wzr, [x28, #1049]", - "str s4, [x8, #132]", + "fmov s8, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v8.8b, v4.8b, v8.8b", + "bsl v9.8b, v4.8b, v8.8b", + "str s9, [x8, #132]", "strb wzr, [x28, #1049]", - "str s5, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v5.4s, v5.4s", + "orr v8.8b, v5.8b, v8.8b", + "bsl v9.8b, v5.8b, v8.8b", + "str s9, [x8, #124]", "ldr s8, [x8, #76]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldr s8, [x8, #140]", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "ldr s8, [x8, #88]", - "str s8, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #68]", "ldr s8, [x8, #80]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #84]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #40]", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15057,7 +16842,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15081,7 +16870,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15105,7 +16898,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15137,7 +16934,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15169,7 +16970,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15201,7 +17006,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15233,7 +17042,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15265,7 +17078,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15297,37 +17114,89 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr w4, [x8, #20]", "ldr s8, [x8, #44]", "str w4, [x8, #256]", - "str s8, [x8, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #24]", "ldr w7, [x8, #24]", "ldr s8, [x8, #16]", "str w7, [x8, #260]", - "str s8, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #28]", "ldr w5, [x8, #28]", "strb wzr, [x28, #1049]", "str w5, [x8, #264]", - "str s3, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #92]", "strb wzr, [x28, #1049]", - "str s4, [x8, #132]", + "fmov s8, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v8.8b, v4.8b, v8.8b", + "bsl v9.8b, v4.8b, v8.8b", + "str s9, [x8, #132]", "strb wzr, [x28, #1049]", - "str s5, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v5.4s, v5.4s", + "orr v8.8b, v5.8b, v8.8b", + "bsl v9.8b, v5.8b, v8.8b", + "str s9, [x8, #124]", "ldr s8, [x8, #76]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldr s8, [x8, #140]", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "ldr s8, [x8, #88]", - "str s8, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #68]", "ldr s8, [x8, #80]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #84]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #40]", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15351,7 +17220,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15375,7 +17248,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15399,7 +17276,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15431,7 +17312,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15463,7 +17348,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15495,7 +17384,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15527,7 +17420,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15559,7 +17456,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -15591,37 +17492,89 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr w4, [x8, #20]", "ldr s8, [x8, #44]", "str w4, [x8, #268]", - "str s8, [x8, #24]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #24]", "ldr w7, [x8, #24]", "ldr s8, [x8, #16]", "str w7, [x8, #272]", - "str s8, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #28]", "ldr w5, [x8, #28]", "strb wzr, [x28, #1049]", "str w5, [x8, #276]", - "str s3, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #92]", "strb wzr, [x28, #1049]", - "str s4, [x8, #132]", + "fmov s3, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v3.8b, v4.8b, v3.8b", + "bsl v8.8b, v4.8b, v3.8b", + "str s8, [x8, #132]", "strb wzr, [x28, #1049]", - "str s5, [x8, #124]", + "fmov s3, w20", + "fcmeq v4.4s, v5.4s, v5.4s", + "orr v3.8b, v5.8b, v3.8b", + "bsl v4.8b, v5.8b, v3.8b", + "str s4, [x8, #124]", "ldr s3, [x8, #76]", - "str s3, [x8, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #64]", "ldr s3, [x8, #140]", - "str s3, [x8, #72]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #72]", "ldr s3, [x8, #88]", - "str s3, [x8, #68]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #68]", "ldr s3, [x8, #80]", - "str s3, [x8, #20]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #20]", "ldr s3, [x8, #84]", - "str s3, [x8, #44]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #44]", "ldr s3, [x8, #40]", - "str s3, [x8, #16]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #16]", "ldr s3, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15646,7 +17599,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15671,7 +17628,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15695,7 +17656,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15727,7 +17692,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15759,7 +17728,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15791,7 +17764,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15823,7 +17800,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15855,7 +17836,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15887,42 +17872,86 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "ldr s2, [x8, #20]", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr w4, [x8, #20]", "ldr s2, [x8, #44]", "str w4, [x8, #280]", "ldr w4, [x6, #136]", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x8, #16]", "ldr w7, [x8, #24]", - "str s2, [x8, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #28]", "ldr w5, [x8, #28]", "ldr s2, [x6, #100]", "str w7, [x8, #284]", "ldr w7, [x6, #140]", - "str s2, [x8, #112]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #112]", "ldr s2, [x6, #112]", "str w5, [x8, #288]", "ldr w5, [x6, #144]", - "str s2, [x8, #116]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #116]", "ldr s2, [x6, #124]", "str w4, [x8, #148]", "str w7, [x8, #152]", "str w5, [x8, #156]", - "str s2, [x8, #120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #120]", "str w4, [x8, #172]", "ldr s2, [x6, #104]", "str w7, [x8, #176]", - "str s2, [x8, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "str w5, [x8, #180]", "ldr s2, [x6, #116]", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x6, #128]", - "str s2, [x8, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #52]", "ldr s2, [x6, #244]", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -15954,7 +17983,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #64]", "ldr s3, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15978,7 +18011,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #72]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #72]", "ldr s3, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16002,15 +18039,35 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #68]", "ldr s2, [x6, #108]", - "str s2, [x8, #92]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #92]", "ldr s2, [x6, #120]", - "str s2, [x8, #96]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #96]", "ldr s2, [x6, #132]", - "str s2, [x8, #100]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #100]", "ldr s2, [x6, #240]", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -16042,7 +18099,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #20]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #20]", "ldr s3, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16066,7 +18127,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #44]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #44]", "ldr s3, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16090,9 +18155,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "ldr s2, [x6, #256]", - "str s2, [x8, #40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldr s2, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -16124,9 +18197,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #20]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #20]", "ldr s3, [x8, #20]", - "str s3, [x8, #92]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #92]", "ldr s4, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -16150,9 +18231,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #20]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #20]", "ldr s4, [x8, #20]", - "str s4, [x8, #132]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #132]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -16176,9 +18265,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "ldr s5, [x8, #20]", - "str s5, [x8, #124]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #124]", "ldr s6, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16202,9 +18299,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #40]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #40]", "ldr s6, [x8, #40]", - "str s6, [x8, #64]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #64]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16228,9 +18333,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #84]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #84]", "ldr s6, [x8, #84]", - "str s6, [x8, #72]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16254,9 +18367,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #80]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #80]", "ldr s6, [x8, #80]", - "str s6, [x8, #68]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #68]", "ldr s6, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16280,9 +18401,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #88]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #88]", "ldr s6, [x8, #88]", - "str s6, [x8, #20]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", "ldr s6, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16306,9 +18435,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #140]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #140]", "ldr s6, [x8, #140]", - "str s6, [x8, #44]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #44]", "ldr s6, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16325,7 +18462,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0x0", + "mov w21, #0x0", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -16334,9 +18471,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #76]", "ldr s2, [x8, #76]", - "str s2, [x8, #16]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #16]", "ldr s2, [x8, #148]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -16368,7 +18513,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", "ldr s6, [x8, #152]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -16400,7 +18549,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #44]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #44]", "ldr s7, [x8, #156]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -16432,7 +18585,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16464,7 +18621,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16496,7 +18657,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16528,7 +18693,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16560,7 +18729,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16592,7 +18765,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16624,37 +18801,89 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", - "str s8, [x8, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #112]", "ldr w4, [x8, #112]", "ldr s8, [x8, #44]", "str w4, [x8, #184]", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr w7, [x8, #116]", "ldr s8, [x8, #16]", "str w7, [x8, #188]", - "str s8, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #120]", "ldr w5, [x8, #120]", "strb wzr, [x28, #1049]", "str w5, [x8, #192]", - "str s3, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #92]", "strb wzr, [x28, #1049]", - "str s4, [x8, #132]", + "fmov s8, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v8.8b, v4.8b, v8.8b", + "bsl v9.8b, v4.8b, v8.8b", + "str s9, [x8, #132]", "strb wzr, [x28, #1049]", - "str s5, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v5.4s, v5.4s", + "orr v8.8b, v5.8b, v8.8b", + "bsl v9.8b, v5.8b, v8.8b", + "str s9, [x8, #124]", "ldr s8, [x8, #40]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldr s8, [x8, #84]", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "ldr s8, [x8, #80]", - "str s8, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #68]", "ldr s8, [x8, #88]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #140]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #76]", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16678,7 +18907,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16702,7 +18935,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16726,7 +18963,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16758,7 +18999,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16790,7 +19035,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16822,7 +19071,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16854,7 +19107,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16886,7 +19143,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16918,37 +19179,89 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", - "str s8, [x8, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #112]", "ldr w4, [x8, #112]", "ldr s8, [x8, #44]", "str w4, [x8, #196]", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr w7, [x8, #116]", "ldr s8, [x8, #16]", "str w7, [x8, #200]", - "str s8, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #120]", "ldr w5, [x8, #120]", "strb wzr, [x28, #1049]", "str w5, [x8, #204]", - "str s3, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #92]", "strb wzr, [x28, #1049]", - "str s4, [x8, #132]", + "fmov s8, w20", + "fcmeq v9.4s, v4.4s, v4.4s", + "orr v8.8b, v4.8b, v8.8b", + "bsl v9.8b, v4.8b, v8.8b", + "str s9, [x8, #132]", "strb wzr, [x28, #1049]", - "str s5, [x8, #124]", + "fmov s8, w20", + "fcmeq v9.4s, v5.4s, v5.4s", + "orr v8.8b, v5.8b, v8.8b", + "bsl v9.8b, v5.8b, v8.8b", + "str s9, [x8, #124]", "ldr s8, [x8, #40]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldr s8, [x8, #84]", - "str s8, [x8, #72]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #72]", "ldr s8, [x8, #80]", - "str s8, [x8, #68]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #68]", "ldr s8, [x8, #88]", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #140]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #76]", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16972,7 +19285,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -16996,7 +19313,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17020,7 +19341,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17052,7 +19377,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17084,7 +19413,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17116,7 +19449,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17148,7 +19485,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #20]", "ldr s8, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17180,7 +19521,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -17212,38 +19557,90 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x8, #20]", - "str s8, [x8, #112]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #112]", "ldr w4, [x8, #112]", "ldr s8, [x8, #44]", "str w4, [x8, #208]", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr w7, [x8, #116]", "ldr s8, [x8, #16]", "str w7, [x8, #212]", - "str s8, [x8, #120]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #120]", "ldr w5, [x8, #120]", "str w5, [x8, #216]", "strb wzr, [x28, #1049]", - "str w20, [x8, #-4]!", - "str s3, [x8, #96]", + "str w21, [x8, #-4]!", + "fmov s8, w20", + "fcmeq v9.4s, v3.4s, v3.4s", + "orr v8.8b, v3.8b, v8.8b", + "bsl v9.8b, v3.8b, v8.8b", + "str s9, [x8, #96]", "strb wzr, [x28, #1049]", - "str s4, [x8, #136]", + "fmov s3, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v3.8b, v4.8b, v3.8b", + "bsl v8.8b, v4.8b, v3.8b", + "str s8, [x8, #136]", "strb wzr, [x28, #1049]", - "str s5, [x8, #128]", + "fmov s3, w20", + "fcmeq v4.4s, v5.4s, v5.4s", + "orr v3.8b, v5.8b, v3.8b", + "bsl v4.8b, v5.8b, v3.8b", + "str s4, [x8, #128]", "ldr s3, [x8, #44]", - "str s3, [x8, #68]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #68]", "ldr s3, [x8, #88]", - "str s3, [x8, #76]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #76]", "ldr s3, [x8, #84]", - "str s3, [x8, #72]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #72]", "ldr s3, [x8, #92]", - "str s3, [x8, #24]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #24]", "ldr s3, [x8, #144]", - "str s3, [x8, #48]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #48]", "ldr s3, [x8, #80]", - "str s3, [x8, #20]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #20]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -17268,7 +19665,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17293,7 +19694,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17317,7 +19722,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17349,7 +19758,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17381,7 +19794,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17413,7 +19830,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17445,7 +19866,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #24]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17477,7 +19902,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -17509,17 +19938,33 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #20]", "ldr s2, [x8, #24]", - "str s2, [x8, #116]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #116]", "ldr w4, [x8, #116]", "ldr s2, [x8, #48]", "str w4, [x8, #224]", - "str s2, [x8, #120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #120]", "ldr w7, [x8, #120]", "ldr s2, [x8, #20]", "str w7, [x8, #228]", - "str s2, [x8, #124]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #124]", "ldr w5, [x8, #124]", "add w7, w8, #0x190 (400)", "str w5, [x8, #232]", @@ -17538,7 +19983,7 @@ }, "Block4": { "x86InstructionCount": 2050, - "ExpectedInstructionCount": 36, + "ExpectedInstructionCount": 41, "x86Insts": [ "fldz", "push 0x0", @@ -19609,7 +22054,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8]", + "mov w21, #0x400000", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8]", "str w20, [x8, #-4]!", "mov w20, #0x3c14", "movk w20, #0xa3, lsl #16", @@ -19632,7 +22082,7 @@ }, "Block5": { "x86InstructionCount": 368, - "ExpectedInstructionCount": 128, + "ExpectedInstructionCount": 157, "x86Insts": [ "mov ebx,dword [eax + 0x68]", "fld dword [esi + 0x2c]", @@ -20010,7 +22460,12 @@ "mvn w27, w8", "subs w26, w8, #0x14 (20)", "mov x8, x26", - "str s2, [x8, #16]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "uxtb w7, w4", "ldr q2, [x28, #3248]", "str w7, [x8, #56]", @@ -20021,7 +22476,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "uxtb w5, w6", "movi v2.2d, #0x0", "str x30, [sp, #-16]!", @@ -20031,33 +22490,37 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", - "ldr w20, [x8, #56]", - "sxtw x20, w20", - "mrs x21, nzcv", - "mov w22, #0x0", - "cmp x20, #0x0 (0)", - "mov w23, #0x8000", - "csel x12, x23, x22, lt", - "cneg x20, x20, mi", - "mov w13, #0x3f", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #8]", + "ldr w21, [x8, #56]", + "sxtw x21, w21", + "mrs x22, nzcv", + "mov w23, #0x0", + "cmp x21, #0x0 (0)", + "mov w12, #0x8000", + "csel x13, x12, x23, lt", + "cneg x21, x21, mi", + "mov w14, #0x3f", "mov x0, #0x3f", - "clz x14, x20", - "sub x14, x0, x14", - "sub x14, x13, x14", - "lsl x15, x20, x14", - "mov w16, #0x403e", - "sub x14, x16, x14", - "cmp x20, #0x0 (0)", - "csel x20, x22, x14, eq", - "orr x20, x12, x20", - "fmov d2, x15", - "fmov v2.D[1], x20", - "msr nzcv, x21", + "clz x15, x21", + "sub x15, x0, x15", + "sub x15, x14, x15", + "lsl x16, x21, x15", + "mov w17, #0x403e", + "sub x15, x17, x15", + "cmp x21, #0x0 (0)", + "csel x21, x23, x15, eq", + "orr x21, x13, x21", + "fmov d2, x16", + "fmov v2.D[1], x21", + "msr nzcv, x22", "str w4, [x8, #64]", - "mov w20, #0xddd8", - "movk w20, #0xa3, lsl #16", - "ldr d3, [x20]", + "mov w21, #0xddd8", + "movk w21, #0xa3, lsl #16", + "ldr d3, [x21]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -20081,28 +22544,36 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #56]", "ldr s2, [x8, #56]", - "str s2, [x8, #4]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #4]", "str w5, [x8, #56]", - "ldr w20, [x8, #56]", - "sxtw x20, w20", - "mrs x21, nzcv", - "cmp x20, #0x0 (0)", - "csel x23, x23, x22, lt", - "cneg x20, x20, mi", + "ldr w21, [x8, #56]", + "sxtw x21, w21", + "mrs x22, nzcv", + "cmp x21, #0x0 (0)", + "csel x12, x12, x23, lt", + "cneg x21, x21, mi", "mov x0, #0x3f", - "clz x12, x20", - "sub x12, x0, x12", - "sub x12, x13, x12", - "lsl x13, x20, x12", - "sub x12, x16, x12", - "cmp x20, #0x0 (0)", - "csel x20, x22, x12, eq", - "orr x20, x23, x20", - "fmov d2, x13", - "fmov v2.D[1], x20", - "msr nzcv, x21", + "clz x13, x21", + "sub x13, x0, x13", + "sub x13, x14, x13", + "lsl x14, x21, x13", + "sub x13, x17, x13", + "cmp x21, #0x0 (0)", + "csel x21, x23, x13, eq", + "orr x21, x12, x21", + "fmov d2, x14", + "fmov v2.D[1], x21", + "msr nzcv, x22", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -20118,9 +22589,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #56]", "ldr s2, [x8, #56]", - "str s2, [x8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8]", "mov w20, #0x5e", "movk w20, #0x1, lsl #16", "str w20, [x8, #-4]!", @@ -20136,7 +22615,7 @@ }, "Block6": { "x86InstructionCount": 315, - "ExpectedInstructionCount": 32, + "ExpectedInstructionCount": 37, "x86Insts": [ "mov eax,dword [esp + 0x110]", "fldz", @@ -20475,7 +22954,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8]", "mov w20, #0x31", "movk w20, #0x1, lsl #16", "str w20, [x8, #-4]!", @@ -20491,7 +22975,7 @@ }, "Block7": { "x86InstructionCount": 214, - "ExpectedInstructionCount": 1743, + "ExpectedInstructionCount": 2000, "x86Insts": [ "fld dword [ecx + 0xc]", "fld dword [ecx + 0x18]", @@ -20740,7 +23224,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #24]", + "mov w20, #0x400000", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #24]", "ldr s3, [x7]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20765,7 +23254,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7, #12]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x7, #12]", "ldur s2, [x7, #-12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -20790,7 +23283,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7]", "ldur s3, [x7, #-24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20815,7 +23312,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x7, #-12]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "stur s5, [x7, #-12]", "ldur s2, [x7, #-36]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -20840,7 +23341,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #8]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20849,7 +23354,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "stur s3, [x7, #-24]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "stur s6, [x7, #-24]", "ldr s3, [x7]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20881,7 +23390,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x7, #24]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x7, #24]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v4.16b", @@ -20897,7 +23410,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x8, #8]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20906,7 +23423,11 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "str s3, [x7]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x7]", "ldur s3, [x7, #-12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20915,14 +23436,14 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldrb w20, [x28, #1051]", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", - "add w21, w20, #0x6 (6)", + "ldrb w21, [x28, #1051]", + "add w21, w21, #0x4 (4)", "and w21, w21, #0x7", - "add x21, x28, x21, lsl #4", - "ldr q6, [x21, #1056]", - "add x21, x28, x20, lsl #4", + "add w22, w21, #0x6 (6)", + "and w22, w22, #0x7", + "add x22, x28, x22, lsl #4", + "ldr q6, [x22, #1056]", + "add x22, x28, x21, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v6.16b", @@ -20938,9 +23459,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #4]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", + "fmov s7, w20", + "fcmeq v8.4s, v3.4s, v3.4s", + "orr v7.8b, v3.8b, v7.8b", + "bsl v8.8b, v3.8b, v7.8b", + "str s8, [x8, #4]", + "add w23, w21, #0x1 (1)", + "and w23, w23, #0x7", "ldr s3, [x7, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -20949,13 +23474,13 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", - "add w23, w20, #0x6 (6)", - "and w23, w23, #0x7", - "add x23, x28, x23, lsl #4", - "ldr q7, [x23, #1056]", - "add x23, x28, x20, lsl #4", + "add w21, w21, #0x7 (7)", + "and w21, w21, #0x7", + "add w12, w21, #0x6 (6)", + "and w12, w12, #0x7", + "add x12, x28, x12, lsl #4", + "ldr q7, [x12, #1056]", + "add x12, x28, x21, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v7.16b", @@ -20971,7 +23496,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "ldr s8, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -20980,9 +23509,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", - "add x12, x28, x20, lsl #4", + "add w21, w21, #0x4 (4)", + "and w21, w21, #0x7", + "add x13, x28, x21, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v2.16b", @@ -20998,7 +23527,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -21007,7 +23540,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add x13, x28, x22, lsl #4", + "add x14, x28, x23, lsl #4", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -21016,9 +23549,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", - "add w22, w22, #0x1 (1)", - "and w22, w22, #0x7", + "fmov s3, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v8.8b, v2.8b, v3.8b", + "str s8, [x8, #60]", + "add w23, w23, #0x1 (1)", + "and w23, w23, #0x7", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21050,7 +23587,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #56]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -21066,8 +23607,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #64]", - "add x22, x28, x22, lsl #4", + "fmov s3, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v8.8b, v2.8b, v3.8b", + "str s8, [x8, #64]", + "add x23, x28, x23, lsl #4", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -21084,7 +23629,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v5.8b, v2.8b, v3.8b", + "str s5, [x8, #4]", "ldr s2, [x7, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21108,7 +23657,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x8, #8]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -21132,7 +23685,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v2.16b", @@ -21148,9 +23705,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #72]", + "add w21, w21, #0x1 (1)", + "and w21, w21, #0x7", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21182,7 +23743,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #76]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #76]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -21198,7 +23763,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #8]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21207,9 +23776,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7be0", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7be0", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21232,7 +23801,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #68]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21241,9 +23814,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bd8", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bd8", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21266,7 +23839,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #72]", "ldr s2, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21275,9 +23852,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bd0", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bd0", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21300,7 +23877,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #76]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21316,7 +23897,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #8]", "ldr s3, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -21341,7 +23926,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #56]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21365,7 +23954,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #76]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21381,7 +23974,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #8]", "ldr s3, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -21406,7 +24003,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #60]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21430,7 +24031,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #72]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21446,7 +24051,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #8]", "ldr s3, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -21471,7 +24080,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #64]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21495,7 +24108,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #68]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21504,9 +24121,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bc8", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bc8", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21529,7 +24146,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #56]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21538,9 +24159,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bc0", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bc0", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21563,7 +24184,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #60]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21572,9 +24197,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bb8", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bb8", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21597,7 +24222,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #64]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #64]", "ldr s2, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21606,9 +24235,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7bb0", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7bb0", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21631,7 +24260,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #68]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21640,9 +24273,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7ba8", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7ba8", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21665,7 +24298,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #72]", "ldr s2, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21674,9 +24311,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w14, #0x7ba0", - "movk w14, #0xa7, lsl #16", - "ldr d3, [x14]", + "mov w15, #0x7ba0", + "movk w15, #0xa7, lsl #16", + "ldr d3, [x15]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -21699,7 +24336,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #76]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21710,8 +24351,8 @@ "mov v2.16b, v0.16b", "ldr q3, [x28, #3472]", "eor v2.16b, v2.16b, v3.16b", - "add x14, x28, x20, lsl #4", - "ldr q3, [x14, #1056]", + "add x15, x28, x21, lsl #4", + "ldr q3, [x15, #1056]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v3.16b", @@ -21727,10 +24368,14 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #88]", - "mov w14, #0x7b98", - "movk w14, #0xa7, lsl #16", - "ldr d4, [x14]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #88]", + "mov w15, #0x7b98", + "movk w15, #0xa7, lsl #16", + "ldr d4, [x15]", "str x30, [sp, #-16]!", "fmov d0, d4", "ldr x0, [x28, #1608]", @@ -21754,7 +24399,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #92]", + "fmov s5, w20", + "fcmeq v6.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v6.8b, v2.8b, v5.8b", + "str s6, [x8, #92]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21765,9 +24414,9 @@ "mov v2.16b, v0.16b", "ldr q5, [x28, #3472]", "eor v2.16b, v2.16b, v5.16b", - "mov w14, #0x7b90", - "movk w14, #0xa7, lsl #16", - "ldr d5, [x14]", + "mov w15, #0x7b90", + "movk w15, #0xa7, lsl #16", + "ldr d5, [x15]", "str x30, [sp, #-16]!", "fmov d0, d5", "ldr x0, [x28, #1608]", @@ -21791,11 +24440,15 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #84]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #84]", "strb wzr, [x28, #1049]", - "mov w14, #0x7b88", - "movk w14, #0xa7, lsl #16", - "ldr d6, [x14]", + "mov w15, #0x7b88", + "movk w15, #0xa7, lsl #16", + "ldr d6, [x15]", "str x30, [sp, #-16]!", "fmov d0, d6", "ldr x0, [x28, #1608]", @@ -21818,7 +24471,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #96]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #96]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21829,9 +24486,9 @@ "mov v2.16b, v0.16b", "ldr q6, [x28, #3472]", "eor v2.16b, v2.16b, v6.16b", - "mov w15, #0x7b80", - "movk w15, #0xa7, lsl #16", - "ldr d6, [x15]", + "mov w16, #0x7b80", + "movk w16, #0xa7, lsl #16", + "ldr d6, [x16]", "str x30, [sp, #-16]!", "fmov d0, d6", "ldr x0, [x28, #1608]", @@ -21854,10 +24511,14 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #80]", - "mov w16, #0x7b78", - "movk w16, #0xa7, lsl #16", - "ldr d6, [x16]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #80]", + "mov w17, #0x7b78", + "movk w17, #0xa7, lsl #16", + "ldr d6, [x17]", "str x30, [sp, #-16]!", "fmov d0, d6", "ldr x0, [x28, #1608]", @@ -21880,7 +24541,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #100]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #100]", "ldr s2, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21896,7 +24561,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s6, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v6.8b, v2.8b, v6.8b", + "bsl v7.8b, v2.8b, v6.8b", + "str s7, [x8, #56]", "ldr s2, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -21905,7 +24574,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr d6, [x14]", + "ldr d6, [x15]", "str x30, [sp, #-16]!", "fmov d0, d6", "ldr x0, [x28, #1608]", @@ -21928,7 +24597,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #60]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #60]", "ldr s6, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -21953,7 +24626,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #64]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #64]", "strb wzr, [x28, #1049]", "ldr q7, [x28, #3472]", "eor v7.16b, v6.16b, v7.16b", @@ -21977,7 +24654,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #68]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #68]", "strb wzr, [x28, #1049]", "ldr q7, [x28, #3472]", "eor v2.16b, v2.16b, v7.16b", @@ -21996,7 +24677,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #72]", + "fmov s5, w20", + "fcmeq v7.4s, v2.4s, v2.4s", + "orr v5.8b, v2.8b, v5.8b", + "bsl v7.8b, v2.8b, v5.8b", + "str s7, [x8, #72]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22007,7 +24692,7 @@ "mov v2.16b, v0.16b", "ldr q5, [x28, #3472]", "eor v5.16b, v2.16b, v5.16b", - "ldr d7, [x15]", + "ldr d7, [x16]", "str x30, [sp, #-16]!", "fmov d0, d7", "ldr x0, [x28, #1608]", @@ -22030,8 +24715,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #76]", - "ldr d7, [x16]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #76]", + "ldr d7, [x17]", "str x30, [sp, #-16]!", "fmov d0, d7", "ldr x0, [x28, #1608]", @@ -22054,7 +24743,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #56]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x8, #56]", "ldur s2, [x4, #-28]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22086,7 +24779,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-28]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-28]", "ldur s2, [x4, #-24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22118,7 +24815,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-24]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-24]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22150,7 +24851,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-20]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-20]", "ldur s2, [x4, #-16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22182,7 +24887,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-16]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-16]", "ldur s2, [x4, #-12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22214,7 +24923,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-12]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-12]", "ldur s2, [x4, #-8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22246,7 +24959,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-8]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-8]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22278,7 +24995,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x4, #-4]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "stur s8, [x4, #-4]", "ldr s2, [x4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22310,7 +25031,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x4]", "ldr s2, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22342,7 +25067,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #4]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x4, #4]", "ldr s2, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22374,7 +25103,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #8]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x4, #8]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22406,7 +25139,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #12]", + "fmov s7, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v7.8b, v2.8b, v7.8b", + "bsl v8.8b, v2.8b, v7.8b", + "str s8, [x4, #12]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22438,25 +25175,29 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #16]", - "strb w20, [x28, #1051]", - "str q6, [x23, #1056]", - "str q4, [x21, #1056]", - "str q3, [x13, #1056]", - "str q5, [x22, #1056]", - "str q2, [x12, #1056]", - "ldrb w21, [x28, #1202]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #16]", + "strb w21, [x28, #1051]", + "str q6, [x12, #1056]", + "str q4, [x22, #1056]", + "str q3, [x14, #1056]", + "str q5, [x23, #1056]", + "str q2, [x13, #1056]", + "ldrb w20, [x28, #1202]", "mov w22, #0x8", - "sub w20, w22, w20", + "sub w21, w22, w21", "mov w22, #0xf8f8", - "lsr w20, w22, w20", - "bic w20, w21, w20", + "lsr w21, w22, w21", + "bic w20, w20, w21", "strb w20, [x28, #1202]" ] }, "Block8": { "x86InstructionCount": 229, - "ExpectedInstructionCount": 1903, + "ExpectedInstructionCount": 2052, "x86Insts": [ "movzx eax,word [esi + edx*0x8]", "fld dword [esi + edx*0x8 + 0x4]", @@ -22693,7 +25434,12 @@ "ldrh w4, [x20]", "add w20, w10, w5, lsl #3", "ldr s2, [x20, #4]", - "str s2, [x8, #36]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #36]", "ldr w10, [x8, #456]", "ldr s2, [x8, #156]", "str x30, [sp, #-16]!", @@ -22705,16 +25451,16 @@ "mov v2.16b, v0.16b", "uxth w4, w4", "mov x7, x4", - "ldr w20, [x8, #484]", - "mul w7, w7, w20", + "ldr w21, [x8, #484]", + "mul w7, w7, w21", "add w4, w4, w4, lsl #1", "add w4, w4, w4", "add w4, w4, w4", - "add w20, w4, #0x8 (8)", - "add w11, w20, w10", + "add w21, w4, #0x8 (8)", + "add w11, w21, w10", "str w11, [x8, #16]", - "add w20, w4, w10", - "ldr s3, [x20, #4]", + "add w21, w4, w10", + "ldr s3, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -22738,8 +25484,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w20, w4, w10", - "ldr s4, [x20]", + "add w21, w4, w10", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -22818,7 +25564,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #208]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #208]", "ldr s2, [x8, #168]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22827,8 +25577,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w4, w10", - "ldr s3, [x20, #4]", + "add w21, w4, w10", + "ldr s3, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -22852,8 +25602,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w20, w4, w10", - "ldr s4, [x20]", + "add w21, w4, w10", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -22932,7 +25682,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #212]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #212]", "ldr s2, [x8, #180]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -22941,8 +25695,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w4, w10", - "ldr s3, [x20, #4]", + "add w21, w4, w10", + "ldr s3, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -22966,8 +25720,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w20, w4, w10", - "ldr s4, [x20]", + "add w21, w4, w10", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -23017,8 +25771,8 @@ "ldr x30, [sp], #16", "mov v3.16b, v0.16b", "ldr w10, [x8, #56]", - "add w20, w10, #0x8 (8)", - "add w11, w20, w4", + "add w21, w10, #0x8 (8)", + "add w11, w21, w4", "str w11, [x8, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -23051,7 +25805,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #216]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #216]", "ldr s2, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23060,8 +25818,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w4, w6", - "ldr s3, [x20]", + "add w21, w4, w6", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -23085,8 +25843,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w20, w4, w6", - "ldr s5, [x20, #4]", + "add w21, w4, w6", + "ldr s5, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -23118,8 +25876,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w20, w4, w6", - "ldr s6, [x20, #8]", + "add w21, w4, w6", + "ldr s6, [x21, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23151,7 +25909,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #232]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x8, #232]", "ldr s3, [x8, #112]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -23160,8 +25922,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w20, w4, w6", - "ldr s6, [x20]", + "add w21, w4, w6", + "ldr s6, [x21]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23185,8 +25947,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w20, w4, w6", - "ldr s8, [x20, #4]", + "add w21, w4, w6", + "ldr s8, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23218,8 +25980,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "add w20, w4, w6", - "ldr s9, [x20, #8]", + "add w21, w4, w6", + "ldr s9, [x21, #8]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -23251,7 +26013,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #236]", + "fmov s8, w20", + "fcmeq v9.4s, v6.4s, v6.4s", + "orr v8.8b, v6.8b, v8.8b", + "bsl v9.8b, v6.8b, v8.8b", + "str s9, [x8, #236]", "ldr s6, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -23260,8 +26026,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w20, w4, w6", - "ldr s8, [x20]", + "add w21, w4, w6", + "ldr s8, [x21]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23285,8 +26051,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "add w20, w4, w6", - "ldr s9, [x20, #4]", + "add w21, w4, w6", + "ldr s9, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -23318,8 +26084,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "add w20, w4, w6", - "ldr s9, [x20, #8]", + "add w21, w4, w6", + "ldr s9, [x21, #8]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -23350,9 +26116,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #240]", - "add w20, w10, w4", - "ldr s6, [x20, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v6.4s, v6.4s", + "orr v8.8b, v6.8b, v8.8b", + "bsl v9.8b, v6.8b, v8.8b", + "str s9, [x8, #240]", + "add w21, w10, w4", + "ldr s6, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23368,8 +26138,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w20, w10, w4", - "ldr s8, [x20]", + "add w21, w10, w4", + "ldr s8, [x21]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23424,9 +26194,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #88]", - "add w20, w10, w4", - "ldr s6, [x20, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v6.4s, v6.4s", + "orr v8.8b, v6.8b, v8.8b", + "bsl v9.8b, v6.8b, v8.8b", + "str s9, [x8, #88]", + "add w21, w10, w4", + "ldr s6, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23442,8 +26216,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w20, w10, w4", - "ldr s8, [x20]", + "add w21, w10, w4", + "ldr s8, [x21]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23506,7 +26280,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #92]", + "fmov s8, w20", + "fcmeq v9.4s, v6.4s, v6.4s", + "orr v8.8b, v6.8b, v8.8b", + "bsl v9.8b, v6.8b, v8.8b", + "str s9, [x8, #92]", "ldr s6, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -23515,8 +26293,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w20, w10, w4", - "ldr s8, [x20, #4]", + "add w21, w10, w4", + "ldr s8, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23540,8 +26318,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v8.16b, v0.16b", - "add w20, w10, w4", - "ldr s9, [x20]", + "add w21, w10, w4", + "ldr s9, [x21]", "str x30, [sp, #-16]!", "fmov s0, s9", "ldr x0, [x28, #1592]", @@ -23591,8 +26369,8 @@ "ldr x30, [sp], #16", "mov v8.16b, v0.16b", "ldr w10, [x8, #32]", - "add w20, w10, #0x4 (4)", - "add w11, w20, w4", + "add w21, w10, #0x4 (4)", + "add w11, w21, w4", "str w11, [x8, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", @@ -23602,8 +26380,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w20, w10, #0x8 (8)", - "add w11, w20, w4", + "add w21, w10, #0x8 (8)", + "add w11, w21, w4", "str w11, [x8, #188]", "ldr w11, [x8, #16]", "str x30, [sp, #-16]!", @@ -23613,9 +26391,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", - "add w20, w10, w4", - "ldr s6, [x20]", + "fmov s8, w20", + "fcmeq v9.4s, v6.4s, v6.4s", + "orr v8.8b, v6.8b, v8.8b", + "bsl v9.8b, v6.8b, v8.8b", + "str s9, [x8, #96]", + "add w21, w10, w4", + "ldr s6, [x21]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23691,9 +26473,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #76]", - "add w20, w10, w4", - "ldr s2, [x20]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #76]", + "add w21, w10, w4", + "ldr s2, [x21]", "str x30, [sp, #-16]!", "fmov s0, s2", "ldr x0, [x28, #1592]", @@ -23773,7 +26559,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #80]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #80]", "ldr s2, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23782,8 +26572,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w10, w4", - "ldr s3, [x20]", + "add w21, w10, w4", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -23872,7 +26662,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #84]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #84]", "ldr s2, [x8, #208]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23905,7 +26699,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #192]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #192]", "ldr s2, [x8, #212]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23929,7 +26727,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #196]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #196]", "ldr s2, [x8, #216]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23953,7 +26755,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #200]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #200]", "ldr s2, [x8, #192]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -23962,8 +26768,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", - "ldr s4, [x20]", + "add w21, w7, w4", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -23979,7 +26785,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -23987,11 +26793,15 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21]", "add w5, w5, #0x1 (1)", - "ldr w20, [x8, #28]", - "eor x27, x5, x20", - "subs w26, w5, w20", + "ldr w21, [x8, #28]", + "eor x27, x5, x21", + "subs w26, w5, w21", "ldr s2, [x8, #196]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24000,8 +26810,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", - "ldr s4, [x20, #4]", + "add w21, w7, w4", + "ldr s4, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24017,7 +26827,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24025,9 +26835,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20, #4]", - "add w20, w7, #0x8 (8)", - "add w4, w20, w4", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21, #4]", + "add w21, w7, #0x8 (8)", + "add w4, w21, w4", "ldr s2, [x4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24059,7 +26873,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4]", "ldr w4, [x8, #476]", "ldr s2, [x8, #232]", "str x30, [sp, #-16]!", @@ -24084,7 +26902,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #220]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #220]", "ldr s2, [x8, #236]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24108,7 +26930,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #224]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #224]", "ldr s2, [x8, #240]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24132,7 +26958,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #228]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #228]", "ldr s2, [x8, #220]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24141,8 +26971,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", - "ldr s4, [x20]", + "add w21, w7, w9", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24158,7 +26988,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", + "add w21, w7, w9", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24166,7 +26996,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21]", "ldr s2, [x8, #224]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24175,8 +27009,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", - "ldr s4, [x20, #4]", + "add w21, w7, w9", + "ldr s4, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24192,7 +27026,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", + "add w21, w7, w9", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24200,7 +27034,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20, #4]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21, #4]", "ldr s2, [x8, #228]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24209,8 +27047,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", - "ldr s4, [x20, #8]", + "add w21, w7, w9", + "ldr s4, [x21, #8]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24226,7 +27064,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w9", + "add w21, w7, w9", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24234,7 +27072,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21, #8]", "ldr s2, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24258,7 +27100,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #60]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #60]", "ldr s2, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24282,7 +27128,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #64]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #64]", "ldr s2, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24306,9 +27156,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #68]", - "add w20, w7, w4", - "ldr s2, [x20]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #68]", + "add w21, w7, w4", + "ldr s2, [x21]", "str x30, [sp, #-16]!", "fmov s0, s2", "ldr x0, [x28, #1592]", @@ -24332,7 +27186,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24340,7 +27194,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21]", "ldr s2, [x8, #64]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24349,8 +27207,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", - "ldr s4, [x20, #4]", + "add w21, w7, w4", + "ldr s4, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24366,7 +27224,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24374,9 +27232,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20, #4]", - "add w20, w7, #0x8 (8)", - "add w4, w20, w4", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x21, #4]", + "add w21, w7, #0x8 (8)", + "add w4, w21, w4", "ldr s2, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24408,7 +27270,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4]", "ldr w4, [x8, #480]", "ldr s2, [x8, #76]", "str x30, [sp, #-16]!", @@ -24433,7 +27299,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #44]", "ldr s2, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24457,7 +27327,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #48]", "ldr s2, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24481,9 +27355,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #52]", - "add w20, w7, w4", - "ldr s2, [x20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #52]", + "add w21, w7, w4", + "ldr s2, [x21]", "str x30, [sp, #-16]!", "fmov s0, s2", "ldr x0, [x28, #1592]", @@ -24507,7 +27385,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24515,7 +27393,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x21]", "ldr s2, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24524,8 +27406,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", - "ldr s3, [x20, #4]", + "add w21, w7, w4", + "ldr s3, [x21, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -24541,7 +27423,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "add w20, w7, w4", + "add w21, w7, w4", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -24549,9 +27431,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x20, #4]", - "add w20, w7, #0x8 (8)", - "add w7, w20, w4", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x21, #4]", + "add w21, w7, #0x8 (8)", + "add w7, w21, w4", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24583,7 +27469,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x7]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x7]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x8", @@ -24596,7 +27486,7 @@ }, "Block9": { "x86InstructionCount": 260, - "ExpectedInstructionCount": 80, + "ExpectedInstructionCount": 89, "x86Insts": [ "fld dword [edi]", "fmul st0", @@ -24883,7 +27773,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24892,9 +27787,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0xf928", - "movk w20, #0xa2, lsl #16", - "ldr d3, [x20]", + "mov w21, #0xf928", + "movk w21, #0xa2, lsl #16", + "ldr d3, [x21]", "str x30, [sp, #-16]!", "fmov d0, d3", "ldr x0, [x28, #1608]", @@ -24917,7 +27812,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -24944,7 +27843,7 @@ }, "Block10": { "x86InstructionCount": 206, - "ExpectedInstructionCount": 183, + "ExpectedInstructionCount": 272, "x86Insts": [ "fld dword [0x00b42a74]", "push ecx", @@ -25160,77 +28059,126 @@ "str w7, [x8, #-4]!", "mov w20, #0x2a20", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "mov w21, #0x400000", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "add w7, w8, #0x48 (72)", "mov w20, #0x2a78", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a24", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a7c", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a28", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a68", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a2c", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a6c", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a30", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a70", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a34", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a5c", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a38", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a60", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a3c", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a64", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a40", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a50", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a44", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a54", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a48", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "mov w20, #0x2a58", "movk w20, #0xb4, lsl #16", "ldr s2, [x20]", "mov w20, #0x2a4c", "movk w20, #0xb4, lsl #16", - "str s2, [x20]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x20]", "ldrb w20, [x28, #1051]", - "add x21, x28, x20, lsl #4", - "ldr q2, [x21, #1056]", + "add x22, x28, x20, lsl #4", + "ldr q2, [x22, #1056]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25238,7 +28186,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #72]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #72]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25246,7 +28198,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #88]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #88]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25254,11 +28210,15 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #104]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #104]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", - "add x21, x28, x20, lsl #4", - "ldr q2, [x21, #1056]", + "add x22, x28, x20, lsl #4", + "ldr q2, [x22, #1056]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25266,7 +28226,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #76]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #76]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25274,7 +28238,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #80]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #80]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25282,7 +28250,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #84]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #84]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25290,7 +28262,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #92]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #92]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25298,7 +28274,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #96]", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #96]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -25306,7 +28286,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #100]", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #100]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", "ldr s2, [x8, #4]", @@ -25324,18 +28308,22 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8]", - "mov w22, #0xc5", - "movk w22, #0x1, lsl #16", - "str w22, [x8, #-4]!", + "fmov s4, w21", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8]", + "mov w21, #0xc5", + "movk w21, #0x1, lsl #16", + "str w21, [x8, #-4]!", "strb w20, [x28, #1051]", - "str q2, [x21, #1056]", - "ldrb w21, [x28, #1202]", + "str q2, [x22, #1056]", + "ldrb w22, [x28, #1202]", "mov w23, #0x8", "sub w20, w23, w20", "mov w23, #0xe0e0", "lsr w20, w23, w20", - "bic w20, w21, w20", + "bic w20, w22, w20", "strb w20, [x28, #1202]" ] } diff --git a/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json b/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json index f2ff6b0077..5d6136461c 100644 --- a/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json +++ b/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json @@ -14,7 +14,7 @@ "Instructions": { "Block1": { "x86InstructionCount": 520, - "ExpectedInstructionCount": 4570, + "ExpectedInstructionCount": 5003, "x86Insts": [ "sub esp,0x88", "fld dword [ecx + 0x4]", @@ -662,7 +662,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #8]", + "mov w20, #0x400000", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #8]", "ldr s9, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -694,7 +699,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8]", "ldr s9, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -734,7 +743,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #4]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #4]", "ldr s9, [x8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -774,7 +787,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8]", "ldr s9, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -798,7 +815,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #128]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #128]", "ldr s9, [x8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -822,7 +843,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #120]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #120]", "strb wzr, [x28, #1049]", "ldr s9, [x8, #4]", "str x30, [sp, #-16]!", @@ -847,7 +872,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #96]", + "fmov s9, w20", + "fcmeq v10.4s, v7.4s, v7.4s", + "orr v9.8b, v7.8b, v9.8b", + "bsl v10.8b, v7.8b, v9.8b", + "str s10, [x8, #96]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -871,7 +900,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #64]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #64]", "ldr s7, [x4, #68]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -951,7 +984,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #8]", "ldr s9, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -983,7 +1020,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8]", "ldr s9, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -1023,7 +1064,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #4]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #4]", "ldr s9, [x8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -1063,7 +1108,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8]", "ldr s9, [x8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -1087,7 +1136,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #36]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #36]", "ldr s9, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -1111,7 +1164,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8, #52]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8, #52]", "ldr s9, [x8]", "str x30, [sp, #-16]!", "fmov s0, s9", @@ -1135,7 +1192,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v7.4s, v7.4s", + "orr v9.8b, v7.8b, v9.8b", + "bsl v10.8b, v7.8b, v9.8b", + "str s10, [x8, #92]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1159,7 +1220,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #100]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #100]", "ldr s7, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1215,7 +1280,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v3.16b", @@ -1255,7 +1324,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v4.16b", @@ -1295,7 +1368,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1351,7 +1428,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1407,7 +1488,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1464,7 +1549,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1496,7 +1585,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #88]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #88]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1528,7 +1621,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #48]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #48]", "ldr s7, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1560,7 +1657,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #104]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #104]", "ldr s7, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1592,7 +1693,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #80]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #80]", "ldr s7, [x4, #76]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1648,7 +1753,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1704,7 +1813,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1760,7 +1873,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #108]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1816,7 +1933,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s9, s0", - "str s9, [x8]", + "fmov s10, w20", + "fcmeq v11.4s, v9.4s, v9.4s", + "orr v10.8b, v9.8b, v10.8b", + "bsl v11.8b, v9.8b, v10.8b", + "str s11, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v8.16b", "mov v1.16b, v4.16b", @@ -1848,7 +1969,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -1889,7 +2014,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "ldr s7, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1921,7 +2050,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #68]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #68]", "ldr s7, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1953,7 +2086,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #76]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #76]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -1985,7 +2122,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #116]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #116]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2017,7 +2158,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #132]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #132]", "ldr s7, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2073,7 +2218,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -2113,7 +2262,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -2153,7 +2306,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2209,7 +2366,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -2249,7 +2410,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "ldr s8, [x8]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2290,7 +2455,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2322,7 +2491,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2354,7 +2527,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #40]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #40]", "ldr s7, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2386,7 +2563,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #24]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #24]", "ldr s7, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2418,7 +2599,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #56]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #56]", "ldr s7, [x4, #84]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2474,7 +2659,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -2514,7 +2703,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "ldr s8, [x8]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -2555,7 +2748,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #116]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -2611,7 +2808,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -2651,7 +2852,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v6.16b", @@ -2691,7 +2896,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -2723,7 +2932,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #44]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #44]", "ldr s5, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -2755,7 +2968,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #60]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #60]", "ldr s5, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -2787,7 +3004,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #108]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #108]", "ldr s5, [x8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -2819,7 +3040,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #84]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #84]", "ldr s5, [x4, #24]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -2915,7 +3140,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #12]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #12]", "strb wzr, [x28, #1049]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", @@ -3037,7 +3266,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v4.16b", @@ -3070,7 +3303,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8]", "ldr s6, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -3102,7 +3339,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #8]", "ldr s6, [x8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -3126,7 +3367,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #112]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #112]", "ldr s6, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -3158,7 +3403,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", "ldr s6, [x8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -3182,7 +3431,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #72]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #72]", "ldr s5, [x4, #92]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -3262,7 +3515,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #12]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -3383,7 +3640,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #4]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #4]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -3456,7 +3717,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v3.16b", @@ -3472,7 +3737,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #16]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #16]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -3504,7 +3773,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #124]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #124]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v4.16b", @@ -3616,7 +3889,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -3632,7 +3909,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -3656,7 +3937,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -3673,7 +3958,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #8]", + "fmov s6, w20", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x4, #8]", "ldr s4, [x8]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -3697,7 +3986,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #12]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #12]", "ldr s4, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -3809,7 +4102,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x4, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x4, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -3825,7 +4122,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x4, #20]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x4, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v4.16b", @@ -3841,7 +4142,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #24]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #24]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -3858,7 +4163,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #28]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #28]", "ldr s4, [x8, #96]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -3978,7 +4287,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -4018,7 +4331,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #32]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #32]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -4034,7 +4351,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #36]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #36]", "strb wzr, [x28, #1049]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", @@ -4059,7 +4380,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #40]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #40]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v6.16b", @@ -4075,7 +4400,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #44]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #44]", "ldr s4, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4195,7 +4524,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -4227,7 +4560,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #48]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #48]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -4251,7 +4588,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #52]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #52]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -4267,7 +4608,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #56]", + "fmov s6, w20", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x4, #56]", "ldr s4, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4291,7 +4636,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #60]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #60]", "ldr s4, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4395,7 +4744,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -4411,7 +4764,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #64]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #64]", "ldr s7, [x8]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -4435,7 +4792,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #68]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #68]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -4452,7 +4813,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #72]", + "fmov s6, w20", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x4, #72]", "ldr s4, [x8]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4476,7 +4841,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #76]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #76]", "ldr s4, [x8, #36]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4588,7 +4957,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x4, #80]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x4, #80]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -4604,7 +4977,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x4, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x4, #84]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v4.16b", @@ -4620,7 +4997,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #88]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #88]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -4637,7 +5018,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #92]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #92]", "ldr s4, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4749,7 +5134,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #4]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -4789,7 +5178,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #96]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #96]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -4805,7 +5198,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x4, #100]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x4, #100]", "strb wzr, [x28, #1049]", "ldr s7, [x8, #4]", "str x30, [sp, #-16]!", @@ -4830,7 +5227,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #104]", + "fmov s7, w20", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v7.8b, v4.8b, v7.8b", + "bsl v8.8b, v4.8b, v7.8b", + "str s8, [x4, #104]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v6.16b", @@ -4846,7 +5247,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #108]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #108]", "ldr s4, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4878,7 +5283,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #12]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #12]", "ldr s4, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4910,7 +5319,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #8]", "ldr s4, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -4974,7 +5387,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #4]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #4]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v3.16b", @@ -5014,7 +5431,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #112]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #112]", "ldr s3, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -5046,7 +5467,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #116]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #116]", "ldr s3, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -5070,7 +5495,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #120]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #120]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -5102,7 +5531,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #124]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #124]", "adds w26, w8, #0x88 (136)", "cfinv", "mov x27, x8", @@ -5112,7 +5545,7 @@ }, "Block2": { "x86InstructionCount": 434, - "ExpectedInstructionCount": 3932, + "ExpectedInstructionCount": 4353, "x86Insts": [ "sub esp,0x90", "fld dword [ecx + 0x4]", @@ -5670,7 +6103,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "mov w20, #0x400000", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -5702,7 +6140,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #4]", "ldr s7, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -5758,7 +6200,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x4, #32]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -5790,7 +6236,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x4, #36]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -5822,7 +6272,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -5838,7 +6292,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #84]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #84]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -5862,7 +6320,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #100]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #100]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -5879,7 +6341,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #60]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "str s8, [x8, #60]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -5903,7 +6369,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #68]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #68]", "ldr s5, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -5935,7 +6405,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #96]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #96]", "ldr s5, [x8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -5967,7 +6441,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #120]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #120]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -5999,7 +6477,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #80]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #80]", "ldr s5, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6031,7 +6513,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #40]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #40]", "ldr s5, [x4, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6111,7 +6597,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -6143,7 +6633,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #4]", "ldr s7, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -6199,7 +6693,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x4, #40]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6231,7 +6729,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x4, #44]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6263,7 +6765,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -6279,7 +6785,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #116]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #116]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6303,7 +6813,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #132]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #132]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -6320,7 +6834,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #32]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "str s8, [x8, #32]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6344,7 +6862,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #36]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #36]", "ldr s5, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6424,7 +6946,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #104]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #104]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -6457,7 +6983,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #136]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #136]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6537,7 +7067,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #128]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #128]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -6571,7 +7105,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #48]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #48]", "ldr s5, [x4, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6651,7 +7189,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #20]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -6683,7 +7225,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #4]", "ldr s7, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -6739,7 +7285,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x4, #48]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6771,7 +7321,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x4, #52]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6803,7 +7357,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -6819,7 +7377,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #92]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #92]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -6843,7 +7405,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #108]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #108]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -6860,7 +7426,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #76]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "str s8, [x8, #76]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6884,7 +7454,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #52]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #52]", "ldr s5, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -6956,7 +7530,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #56]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #56]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -6980,7 +7558,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #64]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #64]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7052,7 +7634,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #72]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #72]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v5.16b", @@ -7076,7 +7662,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #88]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #88]", "ldr s5, [x4, #88]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7156,7 +7746,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #8]", "ldr s7, [x4, #28]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -7188,7 +7782,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #4]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #4]", "ldr s7, [x4, #120]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -7244,7 +7842,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #16]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #16]", "ldr s8, [x4, #56]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7276,7 +7878,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8]", "ldr s8, [x4, #60]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7308,7 +7914,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #12]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "mov v1.16b, v5.16b", @@ -7324,7 +7934,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #124]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #124]", "ldr s8, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s8", @@ -7348,7 +7962,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x8, #140]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #140]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -7365,7 +7983,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #44]", + "fmov s7, w20", + "fcmeq v8.4s, v5.4s, v5.4s", + "orr v7.8b, v5.8b, v7.8b", + "bsl v8.8b, v5.8b, v7.8b", + "str s8, [x8, #44]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7389,7 +8011,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #112]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #112]", "ldr s5, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7469,7 +8095,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #28]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #28]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -7503,7 +8133,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "ldr s5, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7583,7 +8217,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8]", + "fmov s8, w20", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -7664,7 +8302,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #24]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #24]", "ldr s5, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7696,7 +8338,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #8]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #8]", "ldr s5, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7728,7 +8374,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #4]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #4]", "ldr s5, [x8, #128]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -7776,7 +8426,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldr s6, [x8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -7808,7 +8462,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8]", "strb wzr, [x28, #1049]", "ldr s6, [x8, #48]", "str x30, [sp, #-16]!", @@ -7833,7 +8491,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #12]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x8, #12]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -7850,7 +8512,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #96]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x4, #96]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7882,7 +8548,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #100]", + "fmov s6, w20", + "fcmeq v7.4s, v3.4s, v3.4s", + "orr v6.8b, v3.8b, v6.8b", + "bsl v7.8b, v3.8b, v6.8b", + "str s7, [x4, #100]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -7899,7 +8569,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #104]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #104]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7931,7 +8605,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #108]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #108]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7963,7 +8641,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #112]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #112]", "ldr s3, [x8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -7995,7 +8677,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #116]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #116]", "ldr s3, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8027,7 +8713,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #120]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #120]", "ldr s3, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8059,7 +8749,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #124]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #124]", "ldr s3, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8139,7 +8833,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #8]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #8]", "ldr s5, [x8, #120]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -8171,7 +8869,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #4]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #4]", "ldr s5, [x8, #28]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -8227,7 +8929,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldr s6, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -8259,7 +8965,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8]", "ldr s6, [x8, #136]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -8291,7 +9001,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #12]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v3.16b", @@ -8307,7 +9021,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x4, #64]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x4, #64]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -8331,7 +9049,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x4, #68]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x4, #68]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -8348,7 +9070,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #72]", + "fmov s5, w20", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x4, #72]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8372,7 +9098,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #76]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #76]", "ldr s3, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8404,7 +9134,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #80]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #80]", "ldr s3, [x8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8436,7 +9170,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #84]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #84]", "ldr s3, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8468,7 +9206,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #88]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #88]", "ldr s3, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8500,7 +9242,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x4, #92]", + "fmov s4, w20", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x4, #92]", "ldr s3, [x8, #32]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -8572,7 +9318,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", "mov v1.16b, v4.16b", @@ -8660,7 +9410,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8]", + "fmov s7, w20", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v4.16b", @@ -8684,7 +9438,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x8, #12]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8740,7 +9498,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #24]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #24]", "ldr s4, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8772,7 +9534,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #8]", "ldr s4, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8804,7 +9570,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #4]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #4]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8828,7 +9598,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #32]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #32]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8852,7 +9626,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x4, #36]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x4, #36]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -8876,7 +9654,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #40]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #40]", "ldr s2, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8900,7 +9682,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #44]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8932,7 +9718,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #48]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #48]", "ldr s2, [x8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8964,7 +9754,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #52]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #52]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -8996,7 +9790,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #56]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #56]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9028,7 +9826,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #60]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #60]", "ldr s2, [x8, #92]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9108,7 +9910,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #8]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #8]", "ldr s4, [x8, #100]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -9140,7 +9946,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #4]", + "fmov s5, w20", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #4]", "ldr s4, [x8, #124]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -9196,7 +10006,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #16]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #16]", "ldr s5, [x8, #116]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9228,7 +10042,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8]", "ldr s5, [x8, #132]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9260,7 +10078,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #12]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v2.16b", @@ -9276,7 +10098,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4]", "ldr s5, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -9300,7 +10126,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x4, #4]", + "fmov s6, w20", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x4, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -9317,7 +10147,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #8]", + "fmov s4, w20", + "fcmeq v5.4s, v2.4s, v2.4s", + "orr v4.8b, v2.8b, v4.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str s5, [x4, #8]", "ldr s2, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9341,7 +10175,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #12]", "ldr s2, [x8, #8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9373,7 +10211,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #16]", "ldr s2, [x8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9405,7 +10247,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #20]", "ldr s2, [x8, #12]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9437,7 +10283,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #24]", "ldr s2, [x8, #4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -9469,7 +10319,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4, #28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, #28]", "mvn w27, w8", "adds w26, w8, #0x90 (144)", "cfinv", @@ -10288,7 +11142,7 @@ }, "Block4": { "x86InstructionCount": 351, - "ExpectedInstructionCount": 2809, + "ExpectedInstructionCount": 3062, "x86Insts": [ "mov ebp,dword [esp + 0x64]", "fadd dword [ebp + 0x8]", @@ -10688,7 +11542,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "mov w22, #0x400000", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #108]", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10736,7 +11595,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #104]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #104]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10784,7 +11647,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10832,7 +11699,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #40]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #40]", "ldur s2, [x9, #-8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -10855,9 +11726,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x23, x28, x22, lsl #4", + "add w23, w20, #0x7 (7)", + "and w23, w23, #0x7", + "add x12, x28, x23, lsl #4", "ldr q4, [x28, #3472]", "eor v3.16b, v3.16b, v4.16b", "str x30, [sp, #-16]!", @@ -10867,7 +11738,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #60]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #60]", "ldur s3, [x11, #-8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -10900,9 +11775,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", - "add x12, x28, x22, lsl #4", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", + "add x13, x28, x23, lsl #4", "ldr q5, [x28, #3472]", "eor v4.16b, v4.16b, v5.16b", "ldur s5, [x5, #-4]", @@ -10929,8 +11804,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", "ldur s6, [x5, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -10939,7 +11814,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add x13, x28, x22, lsl #4", + "add x14, x28, x23, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v6.16b", @@ -10955,7 +11830,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "ldur s5, [x5, #-4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -10987,7 +11866,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #28]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #28]", "ldr s5, [x11]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11019,7 +11902,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #68]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #68]", "ldr s5, [x11, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11053,7 +11940,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #80]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #80]", "ldr s5, [x11]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11085,7 +11976,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #84]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #84]", "ldr s5, [x5, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11117,7 +12012,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #92]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #92]", "ldur s5, [x7, #-8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11150,8 +12049,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", "ldur s7, [x10, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -11160,7 +12059,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add x22, x28, x22, lsl #4", + "add x23, x28, x23, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -11176,7 +12075,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldur s6, [x10, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11208,7 +12111,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #32]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #32]", "ldur s6, [x10, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11240,7 +12147,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #24]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #24]", "ldr s6, [x7]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11272,7 +12183,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #72]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x7, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11304,7 +12219,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #76]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #76]", "ldr s6, [x10]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11336,7 +12255,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #96]", "ldr s6, [x10, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11368,7 +12291,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #88]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #88]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v3.16b", @@ -11384,7 +12311,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x11, #-8]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x11, #-8]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11408,7 +12339,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x11, #-4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x11, #-4]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11440,7 +12375,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x11]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x11]", "ldr s6, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -11472,7 +12411,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x11, #4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x11, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -11489,7 +12432,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x10, #-8]", + "fmov s5, w22", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "stur s6, [x10, #-8]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11513,7 +12460,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x10, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x10, #-4]", "ldr s3, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11545,7 +12496,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x10]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x10]", "ldr s3, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11577,7 +12532,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x10, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x10, #4]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11673,7 +12632,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x5, #-8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x5, #-8]", "ldr s5, [x8, #108]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11721,7 +12684,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x5, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x5, #-4]", "ldr s3, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11809,7 +12776,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x5]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x5]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v4.16b", @@ -11849,7 +12820,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x5, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x5, #4]", "ldr s3, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -11946,7 +12921,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x7, #-8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x7, #-8]", "ldr s5, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -11994,7 +12973,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x7, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x7, #-4]", "ldr s3, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12090,7 +13073,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x7]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x7]", "ldr s5, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12138,7 +13125,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #4]", "ldr s3, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12225,7 +13216,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #20]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #20]", "ldr s5, [x9, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12259,7 +13254,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #28]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #28]", "ldr s5, [x9]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12292,7 +13291,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #68]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #68]", "ldr s5, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12328,7 +13331,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #80]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #80]", "ldr s5, [x9]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12361,7 +13368,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #84]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #84]", "ldr s5, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12395,7 +13406,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #92]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #92]", "ldr s5, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12451,7 +13466,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldr s6, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12483,7 +13502,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #32]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #32]", "ldr s6, [x9, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12515,7 +13538,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #24]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #24]", "ldr s6, [x9]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12547,7 +13574,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #72]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x6, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12579,7 +13610,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #76]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #76]", "ldr s6, [x9]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12611,7 +13646,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #96]", "ldr s6, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12644,7 +13683,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #88]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #88]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v3.16b", @@ -12660,7 +13703,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #8]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #8]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12684,7 +13731,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #12]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #12]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12716,7 +13767,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9]", "ldr s6, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -12748,7 +13803,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #4]", "ldr w9, [x8, #112]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", @@ -12766,7 +13825,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #8]", + "fmov s5, w22", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x9, #8]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12790,7 +13853,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #12]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #12]", "ldr s3, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12822,7 +13889,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9]", "ldr s3, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -12854,7 +13925,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #4]", "ldr w9, [x8, #48]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", @@ -12951,7 +14026,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x9, #8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x9, #8]", "ldr s5, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -12999,7 +14078,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #12]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #12]", "ldr s3, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13087,7 +14170,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x9]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x9]", "sub w9, w9, #0x10 (16)", "ldr s5, [x8, #52]", "str x30, [sp, #-16]!", @@ -13134,7 +14221,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #20]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #20]", "ldr w9, [x8, #112]", "sub w9, w9, #0x10 (16)", "str w9, [x8, #112]", @@ -13190,9 +14281,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "cset x14, hs", + "cset x15, hs", "subs w26, w9, #0x1 (1)", - "rmif x14, #63, #nzCv", + "rmif x15, #63, #nzCv", "mov x27, x9", "mov x9, x26", "ldr s5, [x8, #44]", @@ -13243,7 +14334,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x6, #24]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x6, #24]", "ldr s5, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -13291,7 +14386,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x6, #28]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x6, #28]", "ldr s3, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -13387,7 +14486,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x6, #16]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x6, #16]", "ldr s5, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -13435,13 +14538,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x6, #20]", + "fmov s8, w22", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x6, #20]", "strb w20, [x28, #1051]", "str q2, [x21, #1056]", - "str q6, [x22, #1056]", - "str q5, [x13, #1056]", - "str q4, [x12, #1056]", - "str q3, [x23, #1056]", + "str q6, [x23, #1056]", + "str q5, [x14, #1056]", + "str q4, [x13, #1056]", + "str q3, [x12, #1056]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", "lsl w22, w22, w20", @@ -13456,7 +14563,7 @@ }, "Block5": { "x86InstructionCount": 346, - "ExpectedInstructionCount": 2804, + "ExpectedInstructionCount": 3057, "x86Insts": [ "mov ebp,dword [esp + 0x64]", "fadd dword [ebp + 0x8]", @@ -13851,7 +14958,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #108]", + "mov w22, #0x400000", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #108]", "ldr s2, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -13899,7 +15011,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #104]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #104]", "ldr s2, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -13947,7 +15063,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #48]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #48]", "ldr s2, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -13995,7 +15115,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #44]", + "fmov s3, w22", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #44]", "ldur s2, [x9, #-8]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -14018,9 +15142,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x23, x28, x22, lsl #4", + "add w23, w20, #0x7 (7)", + "and w23, w23, #0x7", + "add x12, x28, x23, lsl #4", "ldr q4, [x28, #3472]", "eor v3.16b, v3.16b, v4.16b", "str x30, [sp, #-16]!", @@ -14030,7 +15154,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x8, #60]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x8, #60]", "ldur s3, [x5, #-8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14063,8 +15191,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", "ldur s5, [x5, #-4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14073,7 +15201,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add x12, x28, x22, lsl #4", + "add x13, x28, x23, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "mov v1.16b, v5.16b", @@ -14090,8 +15218,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", "ldur s6, [x5, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14100,7 +15228,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add x13, x28, x22, lsl #4", + "add x14, x28, x23, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v6.16b", @@ -14116,7 +15244,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #24]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #24]", "ldur s5, [x11, #-4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14148,7 +15280,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #28]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #28]", "ldr s5, [x11]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14180,7 +15316,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #68]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #68]", "ldr s5, [x11, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14212,7 +15352,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #76]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #76]", "ldr s5, [x11]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14244,7 +15388,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #88]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #88]", "ldr s5, [x11, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14276,7 +15424,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #92]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #92]", "ldur s5, [x7, #-8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14309,8 +15461,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w22, w22, #0x7 (7)", - "and w22, w22, #0x7", + "add w23, w23, #0x7 (7)", + "and w23, w23, #0x7", "ldur s7, [x10, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -14319,7 +15471,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add x22, x28, x22, lsl #4", + "add x23, x28, x23, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -14335,7 +15487,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldur s6, [x10, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14367,7 +15523,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #32]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #32]", "ldur s6, [x10, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14399,7 +15559,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", "ldr s6, [x7]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14431,7 +15595,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #72]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x7, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14463,7 +15631,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #80]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #80]", "ldr s6, [x10]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14495,7 +15667,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #96]", "ldr s6, [x10, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14527,7 +15703,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #84]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #84]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v3.16b", @@ -14543,7 +15723,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x11, #-8]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x11, #-8]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14567,7 +15751,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x11, #-4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x11, #-4]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14599,7 +15787,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x11]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x11]", "ldr s6, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -14631,7 +15823,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x11, #4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x11, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -14648,7 +15844,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x10, #-8]", + "fmov s5, w22", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "stur s6, [x10, #-8]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14672,7 +15872,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x10, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x10, #-4]", "ldr s3, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14704,7 +15908,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x10]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x10]", "ldr s3, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14736,7 +15944,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x10, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x10, #4]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14832,7 +16044,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x5, #-8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x5, #-8]", "ldr s5, [x8, #108]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -14880,7 +16096,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x5, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x5, #-4]", "ldr s3, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -14968,7 +16188,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x5]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x5]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v4.16b", @@ -15008,7 +16232,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x5, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x5, #4]", "ldr s3, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15105,7 +16333,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "stur s5, [x7, #-8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "stur s7, [x7, #-8]", "ldr s5, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15153,7 +16385,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "stur s3, [x7, #-4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "stur s5, [x7, #-4]", "ldr s3, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15249,7 +16485,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x7]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x7]", "ldr s5, [x8, #56]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15297,7 +16537,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x7, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x7, #4]", "ldr s3, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15383,7 +16627,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #24]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #24]", "ldr s5, [x9, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15417,7 +16665,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #28]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #28]", "ldr s5, [x9]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15451,7 +16703,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #68]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #68]", "ldr s5, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15485,7 +16741,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #76]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #76]", "ldr s5, [x9]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15519,7 +16779,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #88]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #88]", "ldr s5, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15553,7 +16817,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x8, #92]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x8, #92]", "ldr s5, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -15609,7 +16877,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #16]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #16]", "ldr s6, [x9, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15641,7 +16913,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #32]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #32]", "ldr s6, [x9, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15673,7 +16949,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", "ldr s6, [x6]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15705,7 +16985,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #72]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #72]", "ldr s6, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15737,7 +17021,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #80]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #80]", "ldr s6, [x9]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15769,7 +17057,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #96]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #96]", "ldr s6, [x9, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15802,7 +17094,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #84]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #84]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v3.16b", @@ -15818,7 +17114,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #8]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #8]", "ldr s6, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15842,7 +17142,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #12]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #12]", "ldr s6, [x8, #72]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15874,7 +17178,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9]", "ldr s6, [x8, #80]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -15906,7 +17214,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x9, #4]", + "fmov s7, w22", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x9, #4]", "ldr w9, [x8, #112]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", @@ -15924,7 +17236,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #8]", + "fmov s5, w22", + "fcmeq v6.4s, v3.4s, v3.4s", + "orr v5.8b, v3.8b, v5.8b", + "bsl v6.8b, v3.8b, v5.8b", + "str s6, [x9, #8]", "ldr s3, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15948,7 +17264,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #12]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #12]", "ldr s3, [x8, #68]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -15980,7 +17300,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9]", "ldr s3, [x8, #76]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16012,7 +17336,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #4]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #4]", "ldr w9, [x8, #40]", "ldr s3, [x8, #24]", "str x30, [sp, #-16]!", @@ -16109,7 +17437,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x9, #8]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x9, #8]", "ldr s5, [x8, #104]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -16157,7 +17489,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #12]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #12]", "ldr s3, [x8, #88]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16245,7 +17581,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x9]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x9]", "ldr s5, [x8, #52]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -16290,7 +17630,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x9, #20]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x9, #20]", "ldr w9, [x8, #112]", "sub w9, w9, #0x10 (16)", "str w9, [x8, #112]", @@ -16356,9 +17700,9 @@ "ldr x30, [sp], #16", "mov v5.16b, v0.16b", "subs w6, w6, #0x10 (16)", - "cset x14, hs", + "cset x15, hs", "subs w26, w9, #0x1 (1)", - "rmif x14, #63, #nzCv", + "rmif x15, #63, #nzCv", "mov x27, x9", "mov x9, x26", "str x30, [sp, #-16]!", @@ -16401,7 +17745,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x6, #24]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x6, #24]", "ldr s5, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -16449,7 +17797,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s3, s0", - "str s3, [x6, #28]", + "fmov s4, w22", + "fcmeq v5.4s, v3.4s, v3.4s", + "orr v4.8b, v3.8b, v4.8b", + "bsl v5.8b, v3.8b, v4.8b", + "str s5, [x6, #28]", "ldr s3, [x8, #84]", "str x30, [sp, #-16]!", "fmov s0, s3", @@ -16545,7 +17897,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s5, s0", - "str s5, [x6, #16]", + "fmov s6, w22", + "fcmeq v7.4s, v5.4s, v5.4s", + "orr v6.8b, v5.8b, v6.8b", + "bsl v7.8b, v5.8b, v6.8b", + "str s7, [x6, #16]", "ldr s5, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s5", @@ -16593,13 +17949,17 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x6, #20]", + "fmov s8, w22", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x6, #20]", "strb w20, [x28, #1051]", "str q2, [x21, #1056]", - "str q6, [x22, #1056]", - "str q5, [x13, #1056]", - "str q4, [x12, #1056]", - "str q3, [x23, #1056]", + "str q6, [x23, #1056]", + "str q5, [x14, #1056]", + "str q4, [x13, #1056]", + "str q3, [x12, #1056]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", "lsl w22, w22, w20", @@ -16614,7 +17974,7 @@ }, "Block6": { "x86InstructionCount": 409, - "ExpectedInstructionCount": 2204, + "ExpectedInstructionCount": 2309, "x86Insts": [ "mov eax,dword [ebp + 0x10]", "fld dword [eax + 0x30]", @@ -17162,7 +18522,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-48]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-48]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -17298,7 +18663,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-44]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -17434,7 +18803,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-40]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -17570,7 +18943,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-36]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -17706,7 +19083,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -17842,7 +19223,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -17978,7 +19363,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -18114,7 +19503,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -18250,7 +19643,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -18386,7 +19783,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -19156,25 +20557,65 @@ "ldr x30, [sp], #16", "mov v7.16b, v0.16b", "ldur s8, [x9, #-48]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldur s8, [x9, #-44]", - "str s8, [x8, #60]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #60]", "ldur s8, [x9, #-40]", - "str s8, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #56]", "ldur s8, [x9, #-36]", - "str s8, [x8, #52]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #52]", "ldur s8, [x9, #-32]", - "str s8, [x8, #48]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #48]", "ldur s8, [x9, #-28]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldur s8, [x9, #-24]", - "str s8, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #40]", "ldur s8, [x9, #-20]", - "str s8, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #36]", "ldur s8, [x9, #-16]", - "str s8, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #32]", "ldur s8, [x9, #-12]", - "str s8, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #28]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -19183,7 +20624,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #24]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -19192,7 +20637,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v8.8b, v2.8b, v3.8b", + "str s8, [x8, #20]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -19201,7 +20650,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "ldr x0, [x28, #1624]", @@ -19209,7 +20662,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -19217,7 +20674,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -19225,7 +20686,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #4]", "str w6, [x8]", "mov w20, #0x462", "movk w20, #0x1, lsl #16", @@ -19235,7 +20700,7 @@ }, "Block7": { "x86InstructionCount": 418, - "ExpectedInstructionCount": 2211, + "ExpectedInstructionCount": 2316, "x86Insts": [ "push ebp", "mov ebp,esp", @@ -19799,7 +21264,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-48]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-48]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -19935,7 +21405,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-44]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-44]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -20071,7 +21545,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-40]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-40]", "ldr w4, [x9, #16]", "ldr s2, [x4, #48]", "str x30, [sp, #-16]!", @@ -20207,7 +21685,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-36]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-36]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -20343,7 +21825,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-32]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-32]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -20479,7 +21965,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-28]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-28]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -20615,7 +22105,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-24]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-24]", "ldr w4, [x9, #16]", "ldr s2, [x4, #32]", "str x30, [sp, #-16]!", @@ -20751,7 +22245,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-20]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-20]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -20887,7 +22385,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-16]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -21023,7 +22525,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-12]", "ldr w4, [x9, #16]", "ldr s2, [x4, #16]", "str x30, [sp, #-16]!", @@ -21793,25 +23299,65 @@ "ldr x30, [sp], #16", "mov v7.16b, v0.16b", "ldur s8, [x9, #-48]", - "str s8, [x8, #64]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #64]", "ldur s8, [x9, #-44]", - "str s8, [x8, #60]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #60]", "ldur s8, [x9, #-40]", - "str s8, [x8, #56]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #56]", "ldur s8, [x9, #-36]", - "str s8, [x8, #52]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #52]", "ldur s8, [x9, #-32]", - "str s8, [x8, #48]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #48]", "ldur s8, [x9, #-28]", - "str s8, [x8, #44]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #44]", "ldur s8, [x9, #-24]", - "str s8, [x8, #40]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #40]", "ldur s8, [x9, #-20]", - "str s8, [x8, #36]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #36]", "ldur s8, [x9, #-16]", - "str s8, [x8, #32]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #32]", "ldur s8, [x9, #-12]", - "str s8, [x8, #28]", + "fmov s9, w20", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x8, #28]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", @@ -21820,7 +23366,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #24]", + "fmov s8, w20", + "fcmeq v9.4s, v2.4s, v2.4s", + "orr v8.8b, v2.8b, v8.8b", + "bsl v9.8b, v2.8b, v8.8b", + "str s9, [x8, #24]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -21829,7 +23379,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #20]", + "fmov s3, w20", + "fcmeq v8.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v8.8b, v2.8b, v3.8b", + "str s8, [x8, #20]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -21838,7 +23392,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #16]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #16]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "ldr x0, [x28, #1624]", @@ -21846,7 +23404,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #12]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -21854,7 +23416,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #8]", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -21862,7 +23428,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x8, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x8, #4]", "str w6, [x8]", "mov w20, #0x46f", "movk w20, #0x1, lsl #16", @@ -21872,7 +23442,7 @@ }, "Block8": { "x86InstructionCount": 231, - "ExpectedInstructionCount": 1963, + "ExpectedInstructionCount": 2128, "x86Insts": [ "fadd dword [esp + 0x40]", "lea edx,[ecx + ecx*0x2]", @@ -22239,7 +23809,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #44]", + "mov w13, #0x400000", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #44]", "ldr s4, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -22287,9 +23862,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #40]", - "add w13, w4, w10, lsl #2", - "ldur s4, [x13, #-8]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #40]", + "add w14, w4, w10, lsl #2", + "ldur s4, [x14, #-8]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -22297,8 +23876,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w13, w4, w6, lsl #2", - "ldr s5, [x13]", + "add w14, w4, w6, lsl #2", + "ldr s5, [x14]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -22314,8 +23893,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w13, w4, w7, lsl #2", - "ldur s5, [x13, #-4]", + "add w14, w4, w7, lsl #2", + "ldur s5, [x14, #-4]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -22325,11 +23904,11 @@ "mov v5.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add x13, x28, x22, lsl #4", + "add x14, x28, x22, lsl #4", "ldr q6, [x28, #3472]", "eor v5.16b, v5.16b, v6.16b", - "add w14, w4, w10, lsl #2", - "ldur s6, [x14, #-4]", + "add w15, w4, w10, lsl #2", + "ldur s6, [x15, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22345,8 +23924,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w14, w4, w6, lsl #2", - "ldr s6, [x14]", + "add w15, w4, w6, lsl #2", + "ldr s6, [x15]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22356,8 +23935,8 @@ "mov v6.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add w14, w4, w10, lsl #2", - "ldur s7, [x14, #-8]", + "add w15, w4, w10, lsl #2", + "ldur s7, [x15, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22365,7 +23944,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add x14, x28, x22, lsl #4", + "add x15, x28, x22, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -22381,9 +23960,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", - "add w15, w4, w10, lsl #2", - "ldur s6, [x15, #-4]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", + "add w16, w4, w10, lsl #2", + "ldur s6, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22391,8 +23974,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldur s7, [x15, #-4]", + "add w16, w4, w7, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22415,9 +23998,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w5, lsl #2", - "ldur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w5, lsl #2", + "ldur s6, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22425,8 +24012,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldur s7, [x15, #-8]", + "add w16, w4, w11, lsl #2", + "ldur s7, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22442,8 +24029,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-4]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22453,8 +24040,8 @@ "mov v7.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-4]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -22478,9 +24065,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22488,8 +24079,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-8]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -22512,9 +24103,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22522,8 +24117,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-4]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -22546,7 +24141,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #24]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #24]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -22555,7 +24154,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w6, lsl #2", + "add w16, w4, w6, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -22563,7 +24162,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -22580,7 +24183,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -22588,7 +24191,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "stur s7, [x15, #-4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "stur s9, [x16, #-4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -22598,7 +24205,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -22606,7 +24213,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-8]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "stur s7, [x16, #-8]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -22623,7 +24234,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -22631,7 +24242,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -22704,7 +24319,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -22712,7 +24327,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x16, #-8]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v5.16b", @@ -22737,7 +24356,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -22745,7 +24364,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -22834,7 +24457,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -22842,7 +24465,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x16, #-8]", "ldr s6, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -22883,7 +24510,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -22891,9 +24518,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", - "add w15, w4, w7, lsl #2", - "ldr s4, [x15]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", + "add w16, w4, w7, lsl #2", + "ldr s4, [x16]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -22901,8 +24532,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s5, [x15]", + "add w16, w4, w10, lsl #2", + "ldr s5, [x16]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -22918,8 +24549,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s5, [x15, #4]", + "add w16, w4, w7, lsl #2", + "ldr s5, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -22929,8 +24560,8 @@ "mov v5.16b, v0.16b", "ldr q6, [x28, #3472]", "eor v5.16b, v5.16b, v6.16b", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #4]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22946,8 +24577,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22955,8 +24586,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -22979,9 +24610,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #4]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -22989,8 +24624,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s7, [x15, #4]", + "add w16, w4, w7, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23013,9 +24648,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w11, lsl #2", - "ldr s6, [x15]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w11, lsl #2", + "ldr s6, [x16]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23023,8 +24662,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23040,8 +24679,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #4]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23049,8 +24688,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #4]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23073,9 +24712,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23083,8 +24726,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23107,9 +24750,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23117,8 +24764,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #4]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23141,7 +24788,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #24]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #24]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -23150,7 +24801,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -23158,7 +24809,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -23175,7 +24830,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -23183,7 +24838,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -23193,7 +24852,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23201,7 +24860,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x16]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -23218,7 +24881,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23226,7 +24889,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -23299,7 +24966,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -23307,7 +24974,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x15]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x16]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v4.16b", @@ -23332,7 +25003,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23340,7 +25011,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -23415,7 +25090,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -23423,7 +25098,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -23441,7 +25120,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23449,9 +25128,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", - "add w15, w4, w7, lsl #2", - "ldr s4, [x15, #8]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", + "add w16, w4, w7, lsl #2", + "ldr s4, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -23459,8 +25142,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s5, [x15, #8]", + "add w16, w4, w10, lsl #2", + "ldr s5, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -23476,8 +25159,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s5, [x15, #12]", + "add w16, w4, w7, lsl #2", + "ldr s5, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -23487,8 +25170,8 @@ "mov v5.16b, v0.16b", "ldr q6, [x28, #3472]", "eor v5.16b, v5.16b, v6.16b", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #12]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23504,8 +25187,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15, #8]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23513,8 +25196,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15, #8]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23537,9 +25220,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #20]", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #12]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #20]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23547,8 +25234,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s7, [x15, #12]", + "add w16, w4, w7, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23571,9 +25258,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w11, lsl #2", - "ldr s6, [x15, #8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w11, lsl #2", + "ldr s6, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -23581,8 +25272,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #8]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23598,8 +25289,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #12]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23607,8 +25298,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #12]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23631,9 +25322,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23641,8 +25336,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #8]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23665,9 +25360,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #12]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -23675,8 +25374,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #12]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -23699,7 +25398,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #24]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #24]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -23708,7 +25411,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -23716,7 +25419,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #8]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -23733,7 +25440,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -23741,7 +25448,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #12]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #12]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -23751,7 +25462,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23759,7 +25470,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #8]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x16, #8]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -23776,7 +25491,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -23784,7 +25499,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #12]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #12]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -23857,7 +25576,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -23865,7 +25584,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x15, #8]", + "fmov s9, w13", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x16, #8]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -23893,7 +25616,7 @@ "ldr x30, [sp], #16", "mov v2.16b, v0.16b", "strb wzr, [x28, #1049]", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -23901,7 +25624,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x15, #12]", + "fmov s3, w13", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x16, #12]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", "ldr s2, [x8, #20]", @@ -23992,7 +25719,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -24000,7 +25727,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #8]", + "fmov s5, w13", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v8.8b, v4.8b, v5.8b", + "str s8, [x16, #8]", "ldr s4, [x8, #40]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -24041,7 +25772,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -24049,7 +25780,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x15, #12]", + "fmov s9, w13", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x16, #12]", "ldp w11, w10, [x8], #8", "ldp w9, w6, [x8], #8", "mvn w27, w8", @@ -24058,8 +25793,8 @@ "mov x8, x26", "strb w20, [x28, #1051]", "str q7, [x22, #1056]", - "str q6, [x14, #1056]", - "str q5, [x13, #1056]", + "str q6, [x15, #1056]", + "str q5, [x14, #1056]", "str q4, [x12, #1056]", "str q3, [x23, #1056]", "str q2, [x21, #1056]", @@ -24074,7 +25809,7 @@ }, "Block9": { "x86InstructionCount": 222, - "ExpectedInstructionCount": 1957, + "ExpectedInstructionCount": 2122, "x86Insts": [ "fadd dword [esp + 0x40]", "lea edx,[ecx + ecx*0x2]", @@ -24432,7 +26167,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #48]", + "mov w13, #0x400000", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #48]", "ldr s4, [x8, #60]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -24480,9 +26220,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x8, #44]", - "add w13, w4, w10, lsl #2", - "ldur s4, [x13, #-8]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x8, #44]", + "add w14, w4, w10, lsl #2", + "ldur s4, [x14, #-8]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -24490,8 +26234,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w13, w4, w6, lsl #2", - "ldr s5, [x13]", + "add w14, w4, w6, lsl #2", + "ldr s5, [x14]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -24507,8 +26251,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w13, w4, w7, lsl #2", - "ldur s5, [x13, #-4]", + "add w14, w4, w7, lsl #2", + "ldur s5, [x14, #-4]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -24518,8 +26262,8 @@ "mov v5.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add w13, w4, w10, lsl #2", - "ldur s6, [x13, #-4]", + "add w14, w4, w10, lsl #2", + "ldur s6, [x14, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -24527,7 +26271,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add x13, x28, x22, lsl #4", + "add x14, x28, x22, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v6.16b", @@ -24536,8 +26280,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w14, w4, w6, lsl #2", - "ldr s6, [x14]", + "add w15, w4, w6, lsl #2", + "ldr s6, [x15]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -24547,8 +26291,8 @@ "mov v6.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add w14, w4, w10, lsl #2", - "ldur s7, [x14, #-8]", + "add w15, w4, w10, lsl #2", + "ldur s7, [x15, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24556,7 +26300,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add x14, x28, x22, lsl #4", + "add x15, x28, x22, lsl #4", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v7.16b", @@ -24572,9 +26316,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #24]", - "add w15, w4, w7, lsl #2", - "ldur s6, [x15, #-4]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #24]", + "add w16, w4, w7, lsl #2", + "ldur s6, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -24582,8 +26330,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldur s7, [x15, #-4]", + "add w16, w4, w10, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24606,9 +26354,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w11, lsl #2", - "ldur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w11, lsl #2", + "ldur s6, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -24616,8 +26368,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-8]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24633,8 +26385,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-4]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24644,8 +26396,8 @@ "mov v7.16b, v0.16b", "add w22, w22, #0x7 (7)", "and w22, w22, #0x7", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-4]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -24669,9 +26421,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24679,8 +26435,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-8]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-8]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -24703,9 +26459,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldur s7, [x15, #-4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldur s7, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -24713,8 +26473,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldur s8, [x15, #-4]", + "add w16, w4, w11, lsl #2", + "ldur s8, [x16, #-4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -24737,7 +26497,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #20]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -24746,7 +26510,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w6, lsl #2", + "add w16, w4, w6, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -24754,7 +26518,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -24771,7 +26539,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -24779,7 +26547,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "stur s7, [x15, #-4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "stur s9, [x16, #-4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -24789,7 +26561,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -24797,7 +26569,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-8]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "stur s7, [x16, #-8]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -24814,7 +26590,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -24822,7 +26598,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -24895,7 +26675,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -24903,7 +26683,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x16, #-8]", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "mov v1.16b, v5.16b", @@ -24928,7 +26712,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -24936,7 +26720,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -25025,7 +26813,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -25033,7 +26821,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "stur s6, [x15, #-8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "stur s8, [x16, #-8]", "ldr s6, [x8, #48]", "str x30, [sp, #-16]!", "fmov s0, s6", @@ -25074,7 +26866,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25082,9 +26874,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "stur s4, [x15, #-4]", - "add w15, w4, w7, lsl #2", - "ldr s4, [x15]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "stur s6, [x16, #-4]", + "add w16, w4, w7, lsl #2", + "ldr s4, [x16]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -25092,8 +26888,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s5, [x15]", + "add w16, w4, w10, lsl #2", + "ldr s5, [x16]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -25109,8 +26905,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s5, [x15, #4]", + "add w16, w4, w7, lsl #2", + "ldr s5, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -25118,8 +26914,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #4]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25135,8 +26931,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25144,8 +26940,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25168,9 +26964,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #24]", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15, #4]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #24]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25178,8 +26978,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15, #4]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25202,9 +27002,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w5, lsl #2", - "ldr s6, [x15]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w5, lsl #2", + "ldr s6, [x16]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25212,8 +27016,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s7, [x15]", + "add w16, w4, w11, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25229,8 +27033,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #4]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25238,8 +27042,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #4]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25262,9 +27066,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25272,8 +27080,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25296,9 +27104,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25306,8 +27118,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #4]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #4]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25330,7 +27142,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #20]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -25339,7 +27155,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -25347,7 +27163,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -25364,7 +27184,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -25372,7 +27192,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #4]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #4]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -25382,7 +27206,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25390,7 +27214,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x16]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -25407,7 +27235,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25415,7 +27243,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -25488,7 +27320,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -25496,7 +27328,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x15]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x16]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", "mov v1.16b, v4.16b", @@ -25521,7 +27357,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25529,7 +27365,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", "ldr s4, [x8, #20]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -25604,7 +27444,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -25612,7 +27452,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v5.16b", @@ -25630,7 +27474,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25638,9 +27482,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #4]", - "add w15, w4, w7, lsl #2", - "ldr s4, [x15, #8]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #4]", + "add w16, w4, w7, lsl #2", + "ldr s4, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -25648,8 +27496,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s5, [x15, #8]", + "add w16, w4, w10, lsl #2", + "ldr s5, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -25665,8 +27513,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s5, [x15, #12]", + "add w16, w4, w7, lsl #2", + "ldr s5, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s5", "ldr x0, [x28, #1592]", @@ -25674,8 +27522,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s6, [x15, #12]", + "add w16, w4, w10, lsl #2", + "ldr s6, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25691,8 +27539,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v5.16b, v0.16b", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15, #8]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25700,8 +27548,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15, #8]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25724,9 +27572,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #24]", - "add w15, w4, w7, lsl #2", - "ldr s6, [x15, #12]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #24]", + "add w16, w4, w7, lsl #2", + "ldr s6, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25734,8 +27586,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", - "ldr s7, [x15, #12]", + "add w16, w4, w10, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25758,9 +27610,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s6, s0", - "str s6, [x8, #28]", - "add w15, w4, w11, lsl #2", - "ldr s6, [x15, #8]", + "fmov s7, w13", + "fcmeq v8.4s, v6.4s, v6.4s", + "orr v7.8b, v6.8b, v7.8b", + "bsl v8.8b, v6.8b, v7.8b", + "str s8, [x8, #28]", + "add w16, w4, w11, lsl #2", + "ldr s6, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s6", "ldr x0, [x28, #1592]", @@ -25768,8 +27624,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #8]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25785,8 +27641,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s7, [x15, #12]", + "add w16, w4, w11, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25794,8 +27650,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w5, lsl #2", - "ldr s8, [x15, #12]", + "add w16, w4, w5, lsl #2", + "ldr s8, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25818,9 +27674,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #16]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #16]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25828,8 +27688,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #8]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #8]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25852,9 +27712,13 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #32]", - "add w15, w4, w5, lsl #2", - "ldr s7, [x15, #12]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #32]", + "add w16, w4, w5, lsl #2", + "ldr s7, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s7", "ldr x0, [x28, #1592]", @@ -25862,8 +27726,8 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w11, lsl #2", - "ldr s8, [x15, #12]", + "add w16, w4, w11, lsl #2", + "ldr s8, [x16, #12]", "str x30, [sp, #-16]!", "fmov s0, s8", "ldr x0, [x28, #1592]", @@ -25886,7 +27750,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x8, #20]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x8, #20]", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "mov v1.16b, v4.16b", @@ -25895,7 +27763,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -25903,7 +27771,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #8]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #8]", "ldr s7, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s7", @@ -25920,7 +27792,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v7.16b, v0.16b", - "add w15, w4, w7, lsl #2", + "add w16, w4, w7, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v7.16b", "ldr x0, [x28, #1624]", @@ -25928,7 +27800,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s7, s0", - "str s7, [x15, #12]", + "fmov s8, w13", + "fcmeq v9.4s, v7.4s, v7.4s", + "orr v8.8b, v7.8b, v8.8b", + "bsl v9.8b, v7.8b, v8.8b", + "str s9, [x16, #12]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", @@ -25938,7 +27814,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25946,7 +27822,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #8]", + "fmov s6, w13", + "fcmeq v7.4s, v4.4s, v4.4s", + "orr v6.8b, v4.8b, v6.8b", + "bsl v7.8b, v4.8b, v6.8b", + "str s7, [x16, #8]", "ldr s4, [x8, #16]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -25963,7 +27843,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w5, lsl #2", + "add w16, w4, w5, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -25971,7 +27851,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #12]", + "fmov s5, w13", + "fcmeq v6.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v6.8b, v4.8b, v5.8b", + "str s6, [x16, #12]", "ldr s4, [x8, #24]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -26044,7 +27928,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v6.16b, v0.16b", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v6.16b", "ldr x0, [x28, #1624]", @@ -26052,7 +27936,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x15, #8]", + "fmov s9, w13", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x16, #8]", "strb wzr, [x28, #1049]", "str x30, [sp, #-16]!", "mov v0.16b, v3.16b", @@ -26080,7 +27968,7 @@ "ldr x30, [sp], #16", "mov v2.16b, v0.16b", "strb wzr, [x28, #1049]", - "add w15, w4, w10, lsl #2", + "add w16, w4, w10, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v2.16b", "ldr x0, [x28, #1624]", @@ -26088,7 +27976,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x15, #12]", + "fmov s3, w13", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x16, #12]", "add w20, w20, #0x1 (1)", "and w20, w20, #0x7", "ldr s2, [x8, #20]", @@ -26179,7 +28071,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -26187,7 +28079,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s4, s0", - "str s4, [x15, #8]", + "fmov s5, w13", + "fcmeq v8.4s, v4.4s, v4.4s", + "orr v5.8b, v4.8b, v5.8b", + "bsl v8.8b, v4.8b, v5.8b", + "str s8, [x16, #8]", "ldr s4, [x8, #44]", "str x30, [sp, #-16]!", "fmov s0, s4", @@ -26228,7 +28124,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v4.16b, v0.16b", - "add w15, w4, w11, lsl #2", + "add w16, w4, w11, lsl #2", "str x30, [sp, #-16]!", "mov v0.16b, v4.16b", "ldr x0, [x28, #1624]", @@ -26236,7 +28132,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s8, s0", - "str s8, [x15, #12]", + "fmov s9, w13", + "fcmeq v10.4s, v8.4s, v8.4s", + "orr v9.8b, v8.8b, v9.8b", + "bsl v10.8b, v8.8b, v9.8b", + "str s10, [x16, #12]", "ldp w11, w10, [x8], #8", "ldp w9, w6, [x8], #8", "mvn w27, w8", @@ -26245,8 +28145,8 @@ "mov x8, x26", "strb w20, [x28, #1051]", "str q7, [x22, #1056]", - "str q6, [x14, #1056]", - "str q5, [x13, #1056]", + "str q6, [x15, #1056]", + "str q5, [x14, #1056]", "str q4, [x12, #1056]", "str q3, [x23, #1056]", "str q2, [x21, #1056]", @@ -26261,7 +28161,7 @@ }, "Block10": { "x86InstructionCount": 420, - "ExpectedInstructionCount": 1954, + "ExpectedInstructionCount": 2131, "x86Insts": [ "push ebp", "mov ebp,esp", @@ -26723,7 +28623,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x7c (124)", "ldr s2, [x4]", @@ -26759,7 +28664,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x78 (120)", "ldr w5, [x9, #8]", @@ -26797,7 +28706,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x7c (124)", "ldr w5, [x9, #8]", @@ -26835,7 +28748,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w5, w4, #0x38 (56)", "ldur w4, [x9, #-8]", @@ -26879,7 +28796,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x74 (116)", "ldr s2, [x4]", @@ -26915,7 +28836,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x70 (112)", "ldr w5, [x9, #8]", @@ -26953,7 +28878,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x74 (116)", "ldr w5, [x9, #8]", @@ -26991,7 +28920,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x30 (48)", "ldur s2, [x9, #-8]", @@ -27002,9 +28935,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w20, #0x3140", - "movk w20, #0x855, lsl #16", - "ldr s3, [x20]", + "mov w21, #0x3140", + "movk w21, #0x855, lsl #16", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27028,9 +28961,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "mov w21, #0x3144", - "movk w21, #0x855, lsl #16", - "ldr s4, [x21]", + "mov w22, #0x3144", + "movk w22, #0x855, lsl #16", + "ldr s4, [x22]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -27061,7 +28994,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x34 (52)", "ldur s2, [x9, #-8]", @@ -27072,9 +29009,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w22, #0x3148", - "movk w22, #0x855, lsl #16", - "ldr s3, [x22]", + "mov w23, #0x3148", + "movk w23, #0x855, lsl #16", + "ldr s3, [x23]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27098,7 +29035,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x20]", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -27129,7 +29066,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x68 (104)", "ldr s2, [x4]", @@ -27165,7 +29106,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x6c (108)", "ldr s2, [x4]", @@ -27201,7 +29146,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x68 (104)", "ldr w5, [x9, #8]", @@ -27239,7 +29188,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x6c (108)", "ldr w5, [x9, #8]", @@ -27277,7 +29230,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x28 (40)", "ldur s2, [x9, #-8]", @@ -27304,9 +29261,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "mov w23, #0x313c", - "movk w23, #0x855, lsl #16", - "ldr s3, [x23]", + "mov w12, #0x313c", + "movk w12, #0x855, lsl #16", + "ldr s3, [x12]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27329,7 +29286,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x2c (44)", "ldur s2, [x9, #-8]", @@ -27356,7 +29317,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x23]", + "ldr s3, [x12]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27379,7 +29340,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x60 (96)", "ldr s2, [x4]", @@ -27415,7 +29380,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x64 (100)", "ldr s2, [x4]", @@ -27451,7 +29420,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x60 (96)", "ldr w5, [x9, #8]", @@ -27489,7 +29462,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x64 (100)", "ldr w5, [x9, #8]", @@ -27527,7 +29504,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x20 (32)", "ldur s2, [x9, #-8]", @@ -27538,7 +29519,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x22]", + "ldr s3, [x23]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27562,9 +29543,9 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "mov w12, #0x314c", - "movk w12, #0x855, lsl #16", - "ldr s4, [x12]", + "mov w13, #0x314c", + "movk w13, #0x855, lsl #16", + "ldr s4, [x13]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -27595,7 +29576,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x24 (36)", "ldur s2, [x9, #-4]", @@ -27606,7 +29591,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x22]", + "ldr s3, [x23]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -27630,7 +29615,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x20]", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -27661,7 +29646,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x58 (88)", "ldr s2, [x4]", @@ -27697,7 +29686,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x1c (28)", "ldr s2, [x4]", @@ -27733,7 +29726,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x58 (88)", "ldr w5, [x9, #8]", @@ -27771,7 +29768,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x5c (92)", "ldr w5, [x9, #8]", @@ -27809,7 +29810,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w5, w4, #0x18 (24)", "ldur w4, [x9, #-4]", @@ -27853,7 +29858,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x14 (20)", "ldr s2, [x4]", @@ -27889,7 +29898,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x50 (80)", "ldr w5, [x9, #8]", @@ -27927,7 +29940,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x54 (84)", "ldr w5, [x9, #8]", @@ -27965,7 +29982,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x10 (16)", "ldur s2, [x9, #-4]", @@ -27976,7 +29997,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x20]", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28000,7 +30021,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x22]", + "ldr s4, [x23]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -28031,7 +30052,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x14 (20)", "ldur s2, [x9, #-4]", @@ -28042,7 +30067,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x22]", + "ldr s3, [x23]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28066,7 +30091,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x12]", + "ldr s4, [x13]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -28097,7 +30122,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x8 (8)", "ldr s2, [x4]", @@ -28133,7 +30162,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0xc (12)", "ldr s2, [x4]", @@ -28169,7 +30202,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w4, w4, #0x48 (72)", "ldr w5, [x9, #8]", @@ -28207,7 +30244,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x4c (76)", "ldr w5, [x9, #8]", @@ -28245,7 +30286,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0x8 (8)", "ldur s2, [x9, #-4]", @@ -28272,7 +30317,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x23]", + "ldr s3, [x12]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28295,7 +30340,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "add w4, w4, #0xc (12)", "ldur s2, [x9, #-4]", @@ -28322,7 +30371,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x23]", + "ldr s3, [x12]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28345,7 +30394,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "ldr s2, [x4]", "str x30, [sp, #-16]!", @@ -28380,7 +30433,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-8]", "ldr w4, [x9, #8]", "add w4, w4, #0x4 (4)", "ldr s2, [x4]", @@ -28416,7 +30473,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "stur s2, [x9, #-4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x9, #-4]", "ldr w4, [x9, #8]", "add w5, w4, #0x40 (64)", "ldr w4, [x9, #8]", @@ -28453,7 +30514,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x5]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5]", "ldr w4, [x9, #8]", "add w4, w4, #0x44 (68)", "ldr w5, [x9, #8]", @@ -28491,7 +30556,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldur s2, [x9, #-4]", "str x30, [sp, #-16]!", "fmov s0, s2", @@ -28500,7 +30569,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x22]", + "ldr s3, [x23]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28524,7 +30593,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x20]", + "ldr s4, [x21]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -28556,7 +30625,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "adds w26, w4, #0x4 (4)", "mov x27, x4", @@ -28569,7 +30642,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v2.16b, v0.16b", - "ldr s3, [x20]", + "ldr s3, [x21]", "str x30, [sp, #-16]!", "fmov s0, s3", "ldr x0, [x28, #1592]", @@ -28593,7 +30666,7 @@ "blr x0", "ldr x30, [sp], #16", "mov v3.16b, v0.16b", - "ldr s4, [x21]", + "ldr s4, [x22]", "str x30, [sp, #-16]!", "fmov s0, s4", "ldr x0, [x28, #1592]", @@ -28624,7 +30697,11 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "ldr w4, [x9, #8]", "str w4, [x8]", "mov w20, #0x47c", diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index 11067252f9..ee2a098111 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -2001,7 +2001,7 @@ ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": [ "0xd9 !11b /2" ], @@ -2016,11 +2016,16 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]" + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": [ "0xd9 !11b /3" ], @@ -2035,7 +2040,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "mov w21, #0x400000", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "add w21, w20, #0x1 (1)", "and w21, w21, #0x7", "strb w21, [x28, #1051]", @@ -6736,7 +6746,7 @@ ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": [ "0xdd !11b /2" ], @@ -6751,11 +6761,16 @@ "blr x0", "ldr x30, [sp], #16", "fmov d2, d0", - "str d2, [x4]" + "mov x20, #0x8000000000000", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x4]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": [ "0xdd !11b /3" ], @@ -6770,7 +6785,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov d2, d0", - "str d2, [x4]", + "mov x21, #0x8000000000000", + "fmov d3, x21", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x4]", "add w21, w20, #0x1 (1)", "and w21, w21, #0x7", "strb w21, [x28, #1051]", @@ -10885,7 +10905,7 @@ }, "memcpy4_32": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 33, "x86Insts": [ "fld dword [rax]", "fstp dword [rdx]", @@ -10898,13 +10918,30 @@ ], "ExpectedArm64ASM": [ "ldr s2, [x4]", - "str s2, [x5]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5]", "ldr s2, [x4, #4]", - "str s2, [x5, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #4]", "ldr s2, [x4, #8]", - "str s2, [x5, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #8]", "ldr s2, [x4, #12]", - "str s2, [x5, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #12]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", @@ -10917,7 +10954,7 @@ }, "memcpy4_64": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 33, "x86Insts": [ "fld qword [rax]", "fstp qword [rdx]", @@ -10930,13 +10967,30 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x4]", - "str d2, [x5]", + "mov x20, #0x8000000000000", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5]", "ldr d2, [x4, #8]", - "str d2, [x5, #8]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #8]", "ldr d2, [x4, #16]", - "str d2, [x5, #16]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #16]", "ldr d2, [x4, #32]", - "str d2, [x5, #32]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #32]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index 01c897a6d8..6691c5a542 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -2000,7 +2000,7 @@ ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": [ "0xd9 !11b /2" ], @@ -2015,11 +2015,16 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]" + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": [ "0xd9 !11b /3" ], @@ -2034,7 +2039,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov s2, s0", - "str s2, [x4]", + "mov w21, #0x400000", + "fmov s3, w21", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4]", "add w21, w20, #0x1 (1)", "and w21, w21, #0x7", "strb w21, [x28, #1051]", @@ -6767,7 +6777,7 @@ ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": [ "0xdd !11b /2" ], @@ -6782,11 +6792,16 @@ "blr x0", "ldr x30, [sp], #16", "fmov d2, d0", - "str d2, [x4]" + "mov x20, #0x8000000000000", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x4]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": [ "0xdd !11b /3" ], @@ -6801,7 +6816,12 @@ "blr x0", "ldr x30, [sp], #16", "fmov d2, d0", - "str d2, [x4]", + "mov x21, #0x8000000000000", + "fmov d3, x21", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x4]", "add w21, w20, #0x1 (1)", "and w21, w21, #0x7", "strb w21, [x28, #1051]", @@ -10852,7 +10872,7 @@ }, "memcpy4_32": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 33, "x86Insts": [ "fld dword [rax]", "fstp dword [rdx]", @@ -10865,13 +10885,30 @@ ], "ExpectedArm64ASM": [ "ldr s2, [x4]", - "str s2, [x5]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5]", "ldr s2, [x4, #4]", - "str s2, [x5, #4]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #4]", "ldr s2, [x4, #8]", - "str s2, [x5, #8]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #8]", "ldr s2, [x4, #12]", - "str s2, [x5, #12]", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x5, #12]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", @@ -10884,7 +10921,7 @@ }, "memcpy4_64": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 33, "x86Insts": [ "fld qword [rax]", "fstp qword [rdx]", @@ -10897,13 +10934,30 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x4]", - "str d2, [x5]", + "mov x20, #0x8000000000000", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5]", "ldr d2, [x4, #8]", - "str d2, [x5, #8]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #8]", "ldr d2, [x4, #16]", - "str d2, [x5, #16]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #16]", "ldr d2, [x4, #32]", - "str d2, [x5, #32]", + "fmov d3, x20", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x5, #32]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", @@ -10916,7 +10970,7 @@ }, "Multiple fld/fst": { "x86InstructionCount": 4, - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 31, "x86Insts": [ "fld qword [ebp+16380]", "fstp qword [eax-0x4]", @@ -10930,14 +10984,23 @@ "ldr d2, [x20]", "sub x20, x4, #0x4 (4)", "mov w20, w20", - "str d2, [x20]", + "mov x21, #0x8000000000000", + "fmov d3, x21", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x20]", "sub x20, x9, #0x8 (8)", "mov w20, w20", "ldr d2, [x20]", "mov w20, #0x3ff2", "add x20, x4, x20", "mov w20, w20", - "str d2, [x20]", + "fmov d3, x21", + "fcmeq d4, d2, d2", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str d4, [x20]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", diff --git a/unittests/InstructionCountCI/x87_32Bit.json b/unittests/InstructionCountCI/x87_32Bit.json index 088db96e49..7527d940db 100644 --- a/unittests/InstructionCountCI/x87_32Bit.json +++ b/unittests/InstructionCountCI/x87_32Bit.json @@ -14,7 +14,7 @@ "Instructions": { "Multiple fld/fst": { "x86InstructionCount": 4, - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 22, "x86Insts": [ "fld dword [ebp+16380]", "fstp dword [eax-0x4]", @@ -23,10 +23,19 @@ ], "ExpectedArm64ASM": [ "ldr s2, [x9, #16380]", - "stur s2, [x4, #-4]", + "mov w20, #0x400000", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "stur s4, [x4, #-4]", "ldur s2, [x9, #-8]", - "mov w20, #0x3ff2", - "str s2, [x4, x20, sxtx]", + "mov w21, #0x3ff2", + "fmov s3, w20", + "fcmeq v4.4s, v2.4s, v2.4s", + "orr v3.8b, v2.8b, v3.8b", + "bsl v4.8b, v2.8b, v3.8b", + "str s4, [x4, x21, sxtx]", "ldrb w20, [x28, #1051]", "ldrb w21, [x28, #1202]", "mov w22, #0x1", diff --git a/unittests/gcc-target-tests-32/Disabled_Tests b/unittests/gcc-target-tests-32/Disabled_Tests index c88e68c020..b82f45ec11 100644 --- a/unittests/gcc-target-tests-32/Disabled_Tests +++ b/unittests/gcc-target-tests-32/Disabled_Tests @@ -46,4 +46,10 @@ sse4_1-round-vec.c.gcc-target-test-32.n500.gcc-target-32 # This has a race with SIGPROF mcount_pic.c.gcc-target-test-32.n1.gcc-target-32 -mcount_pic.c.gcc-target-test-32.n500.gcc-target-32 \ No newline at end of file +mcount_pic.c.gcc-target-test-32.n500.gcc-target-32 + +# Miscompilation issues +# Loads integer with fld rather than fild +# Only triggers issue in n500 due to memcpy fastpath optimization but still disabling both versions. +pr88240.c.gcc-target-test-32.n1.gcc-target-32 +pr88240.c.gcc-target-test-32.n500.gcc-target-32 \ No newline at end of file diff --git a/unittests/gcc-target-tests-64/Disabled_Tests b/unittests/gcc-target-tests-64/Disabled_Tests index ef651afa51..55ec0bf8fa 100644 --- a/unittests/gcc-target-tests-64/Disabled_Tests +++ b/unittests/gcc-target-tests-64/Disabled_Tests @@ -2,3 +2,9 @@ # Crashes or hangs depending on which runner is running it. # Also this has a race with SIGPROF mcount_pic.c.gcc-target-test-64 + +# Miscompilation issues +# Loads integer with fld rather than fild +# Only triggers issue in n500 due to memcpy fastpath optimization but still disabling both versions. +pr88240.c.gcc-target-test-64.n1.gcc-target-64 +pr88240.c.gcc-target-test-64.n500.gcc-target-64 \ No newline at end of file