diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h index 6ff8109fc8..4a992663b8 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h +++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h @@ -117,7 +117,7 @@ class Arm64Emitter : public ARMEmitter::Emitter { // Choose to pad or not depending on if code-caching is enabled. AUTOPAD, }; - void LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, uint64_t Constant, PadType Pad = PadType::AUTOPAD, int MaxBytes = 0); + void LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, uint64_t Constant, PadType Pad, int MaxBytes = 0); protected: FEXCore::Context::ContextImpl* EmitterCTX; diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index f361798a2b..cf71e32990 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -188,7 +188,7 @@ void Dispatcher::EmitDispatcher() { if (std::popcount(VirtualMemorySize) == 1) { and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), VirtualMemorySize - 1); } else { - LoadConstant(ARMEmitter::Size::i64Bit, TMP4, VirtualMemorySize); + LoadConstant(ARMEmitter::Size::i64Bit, TMP4, VirtualMemorySize, CPU::Arm64Emitter::PadType::NOPAD); and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), TMP4); } @@ -261,7 +261,7 @@ void Dispatcher::EmitDispatcher() { #ifdef _M_ARM_64EC ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET); - LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1); + LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1, CPU::Arm64Emitter::PadType::NOPAD); strb(TMP1.W(), TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET); #endif @@ -429,7 +429,7 @@ void Dispatcher::EmitDispatcher() { PopCalleeSavedRegisters(); ret(); } else { - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0, CPU::Arm64Emitter::PadType::NOPAD); ldr(ARMEmitter::XReg::x1, ARMEmitter::Reg::r1); } } @@ -488,7 +488,7 @@ void Dispatcher::EmitDispatcher() { // Now push the callback return trampoline to the guest stack // Guest will be misaligned because calling a thunk won't correct the guest's stack once we call the callback from the host - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, CTX->SignalDelegation->GetThunkCallbackRET()); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, CTX->SignalDelegation->GetThunkCallbackRET(), CPU::Arm64Emitter::PadType::NOPAD); ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP])); sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, CTX->Config.Is64BitMode ? 16 : 12); diff --git a/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp index cf40738b90..2a9734400e 100644 --- a/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp @@ -43,7 +43,8 @@ DEF_BINOP_WITH_CONSTANT(Ror, rorv, ror) DEF_OP(Constant) { auto Op = IROp->C(); auto Dst = GetReg(Node); - LoadConstant(ARMEmitter::Size::i64Bit, Dst, Op->Constant); + // TODO: Audit the frontend generating these constants and pass through padding information. + LoadConstant(ARMEmitter::Size::i64Bit, Dst, Op->Constant, CPU::Arm64Emitter::PadType::AUTOPAD); } DEF_OP(EntrypointOffset) { @@ -1289,7 +1290,7 @@ DEF_OP(MaskGenerateFromBitWidth) { auto Op = IROp->C(); auto BitWidth = GetReg(Op->BitWidth); - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, -1); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, -1, CPU::Arm64Emitter::PadType::NOPAD); cmp(ARMEmitter::Size::i64Bit, BitWidth, 0); lslv(ARMEmitter::Size::i64Bit, TMP2, TMP1, BitWidth); csinv(ARMEmitter::Size::i64Bit, GetReg(Node), TMP1, TMP2, ARMEmitter::Condition::CC_EQ); diff --git a/FEXCore/Source/Interface/Core/JIT/BranchOps.cpp b/FEXCore/Source/Interface/Core/JIT/BranchOps.cpp index 7c6a937cf2..aa0e49537b 100644 --- a/FEXCore/Source/Interface/Core/JIT/BranchOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/BranchOps.cpp @@ -271,7 +271,7 @@ DEF_OP(Syscall) { // Still without overwriting registers that matter // 16bit LoadConstant to be a single instruction // This gives the signal handler a value to check to see if we are in a syscall at all - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GPRSpillMask & 0xFFFF); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GPRSpillMask & 0xFFFF, CPU::Arm64Emitter::PadType::NOPAD); str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo)); uint64_t SPOffset = AlignUp(FEXCore::HLE::SyscallArguments::MAX_ARGS * 8, 16); @@ -362,29 +362,29 @@ DEF_OP(ValidateCode) { EmitCheck(8, [&]() { ldr(TMP1, Base, Offset); - LoadConstant(ARMEmitter::Size::i64Bit, TMP2, *(const uint64_t*)(OldCode + Offset)); + LoadConstant(ARMEmitter::Size::i64Bit, TMP2, *(const uint64_t*)(OldCode + Offset), CPU::Arm64Emitter::PadType::NOPAD); }); EmitCheck(4, [&]() { ldr(TMP1.W(), Base, Offset); - LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint32_t*)(OldCode + Offset)); + LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint32_t*)(OldCode + Offset), CPU::Arm64Emitter::PadType::NOPAD); }); EmitCheck(2, [&]() { ldrh(TMP1.W(), Base, Offset); - LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint16_t*)(OldCode + Offset)); + LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint16_t*)(OldCode + Offset), CPU::Arm64Emitter::PadType::NOPAD); }); EmitCheck(1, [&]() { ldrb(TMP1.W(), Base, Offset); - LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint8_t*)(OldCode + Offset)); + LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint8_t*)(OldCode + Offset), CPU::Arm64Emitter::PadType::NOPAD); }); ARMEmitter::ForwardLabel End; - LoadConstant(ARMEmitter::Size::i32Bit, Dst, 0); + LoadConstant(ARMEmitter::Size::i32Bit, Dst, 0, CPU::Arm64Emitter::PadType::NOPAD); b_OrRestart(&End); BindOrRestart(&Fail); - LoadConstant(ARMEmitter::Size::i32Bit, Dst, 1); + LoadConstant(ARMEmitter::Size::i32Bit, Dst, 1, CPU::Arm64Emitter::PadType::NOPAD); BindOrRestart(&End); } @@ -397,7 +397,8 @@ DEF_OP(ThreadRemoveCodeEntry) { // X1: RIP mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, STATE.R()); - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Entry); + // TODO: Relocations don't seem to be wired up to this...? + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Entry, CPU::Arm64Emitter::PadType::AUTOPAD); ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.ThreadRemoveCodeEntryFromJIT)); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { diff --git a/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp b/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp index b1c0a4bcc3..170cf8a05b 100644 --- a/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp @@ -135,7 +135,7 @@ DEF_OP(VAESKeyGenAssist) { if (Op->RCON) { tbl(Dst.Q(), Dst.Q(), Swizzle.Q()); - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->RCON) << 32); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->RCON) << 32, CPU::Arm64Emitter::PadType::NOPAD); dup(ARMEmitter::SubRegSize::i64Bit, VTMP2.Q(), TMP1); eor(Dst.Q(), Dst.Q(), VTMP2.Q()); } else { diff --git a/FEXCore/Source/Interface/Core/JIT/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/JIT.cpp index 88aa2fb4a5..48e8853f78 100644 --- a/FEXCore/Source/Interface/Core/JIT/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/JIT.cpp @@ -758,14 +758,14 @@ void Arm64JITCore::EmitTFCheck() { uint64_t Constant {}; memcpy(&Constant, &State, sizeof(State)); - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Constant); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Constant, CPU::Arm64Emitter::PadType::NOPAD); str(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, SynchronousFaultData)); ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.GuestSignal_SIGTRAP)); br(TMP1); (void)Bind(&l_TFBlocked); // If TF was blocked for this instruction, unblock it for the next. - LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 0b11); + LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 0b11, CPU::Arm64Emitter::PadType::NOPAD); strb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC])); (void)Bind(&l_TFUnset); } @@ -805,7 +805,7 @@ void Arm64JITCore::EmitEntryPoint(ARMEmitter::BackwardLabel& HeaderLabel, bool C if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) { sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize); } else { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize, CPU::Arm64Emitter::PadType::NOPAD); sub(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::rsp, ARMEmitter::XReg::rsp, TMP1, ARMEmitter::ExtendedType::LSL_64, 0); } } @@ -1163,7 +1163,7 @@ void Arm64JITCore::ResetStack() { add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize); } else { // Too big to fit in a 12bit immediate - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize, CPU::Arm64Emitter::PadType::NOPAD); add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::rsp, ARMEmitter::XReg::rsp, TMP1, ARMEmitter::ExtendedType::LSL_64, 0); } } diff --git a/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp index 17861a03f4..088514fbfd 100644 --- a/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp @@ -378,7 +378,7 @@ DEF_OP(SpillRegister) { switch (OpSize) { case IR::OpSize::i8Bit: { if (SlotOffset > LSByteMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); strb(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { strb(Src, ARMEmitter::Reg::rsp, SlotOffset); @@ -387,7 +387,7 @@ DEF_OP(SpillRegister) { } case IR::OpSize::i16Bit: { if (SlotOffset > LSHalfMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); strh(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { strh(Src, ARMEmitter::Reg::rsp, SlotOffset); @@ -396,7 +396,7 @@ DEF_OP(SpillRegister) { } case IR::OpSize::i32Bit: { if (SlotOffset > LSWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); str(Src.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { str(Src.W(), ARMEmitter::Reg::rsp, SlotOffset); @@ -405,7 +405,7 @@ DEF_OP(SpillRegister) { } case IR::OpSize::i64Bit: { if (SlotOffset > LSDWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); str(Src.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { str(Src.X(), ARMEmitter::Reg::rsp, SlotOffset); @@ -420,7 +420,7 @@ DEF_OP(SpillRegister) { switch (OpSize) { case IR::OpSize::i32Bit: { if (SlotOffset > LSWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); str(Src.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { str(Src.S(), ARMEmitter::Reg::rsp, SlotOffset); @@ -429,7 +429,7 @@ DEF_OP(SpillRegister) { } case IR::OpSize::i64Bit: { if (SlotOffset > LSDWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); str(Src.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { str(Src.D(), ARMEmitter::Reg::rsp, SlotOffset); @@ -438,7 +438,7 @@ DEF_OP(SpillRegister) { } case IR::OpSize::i128Bit: { if (SlotOffset > LSQWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); str(Src.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { str(Src.Q(), ARMEmitter::Reg::rsp, SlotOffset); @@ -467,7 +467,7 @@ DEF_OP(FillRegister) { switch (OpSize) { case IR::OpSize::i8Bit: { if (SlotOffset > LSByteMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldrb(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldrb(Dst, ARMEmitter::Reg::rsp, SlotOffset); @@ -476,7 +476,7 @@ DEF_OP(FillRegister) { } case IR::OpSize::i16Bit: { if (SlotOffset > LSHalfMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldrh(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldrh(Dst, ARMEmitter::Reg::rsp, SlotOffset); @@ -485,7 +485,7 @@ DEF_OP(FillRegister) { } case IR::OpSize::i32Bit: { if (SlotOffset > LSWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldr(Dst.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldr(Dst.W(), ARMEmitter::Reg::rsp, SlotOffset); @@ -494,7 +494,7 @@ DEF_OP(FillRegister) { } case IR::OpSize::i64Bit: { if (SlotOffset > LSDWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldr(Dst.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldr(Dst.X(), ARMEmitter::Reg::rsp, SlotOffset); @@ -509,7 +509,7 @@ DEF_OP(FillRegister) { switch (OpSize) { case IR::OpSize::i32Bit: { if (SlotOffset > LSWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldr(Dst.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldr(Dst.S(), ARMEmitter::Reg::rsp, SlotOffset); @@ -518,7 +518,7 @@ DEF_OP(FillRegister) { } case IR::OpSize::i64Bit: { if (SlotOffset > LSDWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldr(Dst.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldr(Dst.D(), ARMEmitter::Reg::rsp, SlotOffset); @@ -527,7 +527,7 @@ DEF_OP(FillRegister) { } case IR::OpSize::i128Bit: { if (SlotOffset > LSQWordMaxUnsignedOffset) { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset, CPU::Arm64Emitter::PadType::NOPAD); ldr(Dst.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); } else { ldr(Dst.Q(), ARMEmitter::Reg::rsp, SlotOffset); @@ -609,7 +609,7 @@ ARMEmitter::Register Arm64JITCore::ApplyMemOperand(IR::OpSize AccessSize, ARMEmi if (Const == 0) { return Base; } - LoadConstant(ARMEmitter::Size::i64Bit, Tmp, Const); + LoadConstant(ARMEmitter::Size::i64Bit, Tmp, Const, CPU::Arm64Emitter::PadType::NOPAD); add(ARMEmitter::Size::i64Bit, Tmp, Base, Tmp, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(OffsetScale)); } else { auto RegOffset = GetReg(Offset); @@ -1213,7 +1213,7 @@ DEF_OP(VLoadVectorGatherMasked) { AddrReg = GetReg(Op->AddrBase); } else { ///< OpcodeDispatcher didn't provide a Base address while SVE requires one. - LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0); + LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0, CPU::Arm64Emitter::PadType::NOPAD); } MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale); } @@ -1299,7 +1299,7 @@ DEF_OP(VLoadVectorGatherMaskedQPS) { AddrReg = *BaseAddr; } else { ///< OpcodeDispatcher didn't provide a Base address while SVE requires one. - LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0); + LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0, CPU::Arm64Emitter::PadType::NOPAD); } MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndex.Z(), ModType, SVEScale); } diff --git a/FEXCore/Source/Interface/Core/JIT/MiscOps.cpp b/FEXCore/Source/Interface/Core/JIT/MiscOps.cpp index bc41258d12..43d65e05fa 100644 --- a/FEXCore/Source/Interface/Core/JIT/MiscOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/MiscOps.cpp @@ -73,7 +73,7 @@ DEF_OP(Break) { uint64_t Constant {}; memcpy(&Constant, &State, sizeof(State)); - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Constant); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Constant, CPU::Arm64Emitter::PadType::NOPAD); str(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CpuStateFrame, SynchronousFaultData)); switch (Op->Reason.Signal) { @@ -234,7 +234,7 @@ DEF_OP(ProcessorID) { // 16bit LoadConstant to be a single instruction // We must always spill at least one register (x8) so this value always has a bit set // This gives the signal handler a value to check to see if we are in a syscall at all - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, SpillMask & 0xFFFF); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, SpillMask & 0xFFFF, CPU::Arm64Emitter::PadType::NOPAD); str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo)); // Allocate some temporary space for storing the uint32_t CPU and Node IDs @@ -247,7 +247,7 @@ DEF_OP(ProcessorID) { #else constexpr auto GetCPUSyscallNum = SYS_getcpu; #endif - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r8, GetCPUSyscallNum); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r8, GetCPUSyscallNum, CPU::Arm64Emitter::PadType::NOPAD); // CPU pointer in x0 add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::rsp, 0); @@ -307,7 +307,7 @@ DEF_OP(MonoBackpatcherWrite) { #ifdef _M_ARM_64EC ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET); - LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1); + LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1, CPU::Arm64Emitter::PadType::NOPAD); strb(TMP1.W(), TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET); #endif diff --git a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp index 9a2ef66e91..f8c9bc1b9e 100644 --- a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp @@ -939,7 +939,7 @@ DEF_OP(VectorImm) { LOGMAN_THROW_A_FMT(Op->ShiftAmount == 0, "SVE VectorImm doesn't support a shift"); if (ElementSize > IR::OpSize::i8Bit && (Op->Immediate & 0x80)) { // SVE dup uses sign extension where VectorImm wants zext - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Op->Immediate); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Op->Immediate, CPU::Arm64Emitter::PadType::NOPAD); dup(SubRegSize, Dst.Z(), TMP1); } else { dup_imm(SubRegSize, Dst.Z(), static_cast(Op->Immediate)); @@ -947,7 +947,7 @@ DEF_OP(VectorImm) { } else { if (ElementSize == IR::OpSize::i64Bit) { // movi with 64bit element size doesn't do what we want here - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->Immediate) << Op->ShiftAmount); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->Immediate) << Op->ShiftAmount, CPU::Arm64Emitter::PadType::NOPAD); dup(SubRegSize, Dst.Q(), TMP1.R()); } else { movi(SubRegSize, Dst.Q(), Op->Immediate, Op->ShiftAmount); @@ -2521,7 +2521,7 @@ DEF_OP(VUShl) { movi(SubRegSize, VTMP1.Q(), MaxShift); umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q()); } else { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift, CPU::Arm64Emitter::PadType::NOPAD); dup(SubRegSize, VTMP1.Q(), TMP1.R()); // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements @@ -2577,7 +2577,7 @@ DEF_OP(VUShr) { movi(SubRegSize, VTMP1.Q(), MaxShift); umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q()); } else { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift, CPU::Arm64Emitter::PadType::NOPAD); dup(SubRegSize, VTMP1.Q(), TMP1.R()); // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements @@ -2636,7 +2636,7 @@ DEF_OP(VSShr) { movi(SubRegSize, VTMP1.Q(), MaxShift); umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q()); } else { - LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift); + LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift, CPU::Arm64Emitter::PadType::NOPAD); dup(SubRegSize, VTMP1.Q(), TMP1.R()); // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements