diff --git a/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp index 2a9734400e..071508f60c 100644 --- a/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp @@ -43,8 +43,15 @@ DEF_BINOP_WITH_CONSTANT(Ror, rorv, ror) DEF_OP(Constant) { auto Op = IROp->C(); auto Dst = GetReg(Node); - // TODO: Audit the frontend generating these constants and pass through padding information. - LoadConstant(ARMEmitter::Size::i64Bit, Dst, Op->Constant, CPU::Arm64Emitter::PadType::AUTOPAD); + + const auto PadType = [Pad = Op->Pad]() { + switch (Pad) { + case IR::ConstPad::NoPad: return CPU::Arm64Emitter::PadType::NOPAD; + case IR::ConstPad::DoPad: return CPU::Arm64Emitter::PadType::DOPAD; + default: return CPU::Arm64Emitter::PadType::AUTOPAD; + } + }(); + LoadConstant(ARMEmitter::Size::i64Bit, Dst, Op->Constant, PadType, Op->MaxBytes); } DEF_OP(EntrypointOffset) { diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index d6f77c3251..dea7d4c630 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1367,7 +1367,7 @@ void OpDispatchBuilder::CPUIDOp(OpcodeArgs) { StoreGPRRegister(X86State::REG_RDX, RDX); } -uint32_t OpDispatchBuilder::LoadConstantShift(X86Tables::DecodedOp Op, bool Is1Bit) { +uint32_t OpDispatchBuilder::GetConstantShift(X86Tables::DecodedOp Op, bool Is1Bit) { if (Is1Bit) { return 1; } else { @@ -1402,7 +1402,7 @@ void OpDispatchBuilder::SHLOp(OpcodeArgs) { void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs, bool SHL1Bit) { Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - uint64_t Shift = LoadConstantShift(Op, SHL1Bit); + uint64_t Shift = GetConstantShift(Op, SHL1Bit); const auto Size = GetSrcBitSize(Op); Ref Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Constant(Shift)); @@ -1425,7 +1425,7 @@ void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs, bool SHR1Bit) { const auto Size = GetSrcBitSize(Op); auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32}); - uint64_t Shift = LoadConstantShift(Op, SHR1Bit); + uint64_t Shift = GetConstantShift(Op, SHR1Bit); auto ALUOp = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Constant(Shift)); CalculateFlags_ShiftRightImmediate(OpSizeFromSrc(Op), ALUOp, Dest, Shift); @@ -1477,7 +1477,7 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) { } void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) { - uint64_t Shift = LoadConstantShift(Op, false); + uint64_t Shift = GetConstantShift(Op, false); const auto Size = GetSrcBitSize(Op); Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = Size >= 32}); @@ -1545,7 +1545,7 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) { Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags); Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags); - uint64_t Shift = LoadConstantShift(Op, false); + uint64_t Shift = GetConstantShift(Op, false); const auto Size = GetSrcBitSize(Op); if (Shift != 0) { @@ -1586,7 +1586,7 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs, bool Immediate, bool SHR1Bit) { } if (Immediate) { - uint64_t Shift = LoadConstantShift(Op, SHR1Bit); + uint64_t Shift = GetConstantShift(Op, SHR1Bit); Ref Result = _Ashr(OpSize, Dest, Constant(Shift)); CalculateFlags_SignShiftRightImmediate(OpSizeFromSrc(Op), Result, Dest, Shift); @@ -1614,7 +1614,7 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs, bool Left, bool IsImmediate, bool I ArithRef UnmaskedSrc; if (Is1Bit || IsImmediate) { - UnmaskedConst = LoadConstantShift(Op, Is1Bit); + UnmaskedConst = GetConstantShift(Op, Is1Bit); UnmaskedSrc = ARef(UnmaskedConst); } else { UnmaskedSrc = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true})); @@ -4148,14 +4148,14 @@ void OpDispatchBuilder::UpdatePrefixFromSegment(Ref Segment, uint32_t SegmentReg // In some cases the upper 16-bits of the 32-bit GPR contain garbage to ignore. auto GDT = _Bfe(OpSize::i32Bit, 1, 2, Segment); // Fun quirk, if we mask the selector then it is premultiplied by 8 which we need to do for accessing anyway. - auto SegmentOffset = _And(OpSize::i32Bit, Segment, _Constant(0xfff8)); + auto SegmentOffset = _And(OpSize::i32Bit, Segment, _Constant(0xfff8, ConstPad::NoPad)); Ref SegmentBase = _LoadContextGPRIndexed(GDT, OpSize::i64Bit, offsetof(FEXCore::Core::CPUState, segment_arrays[0]), 8); Ref NewSegment = _LoadMemGPR(OpSize::i64Bit, SegmentBase, SegmentOffset, OpSize::i8Bit, MemOffsetType::UXTW, 1); CheckLegacySegmentWrite(NewSegment, SegmentReg); // Extract the 32-bit base from the GDT segment. - auto Upper32 = _Lshr(OpSize::i64Bit, NewSegment, _Constant(32)); - auto Masked = _And(OpSize::i32Bit, Upper32, _Constant(0xFF00'0000)); + auto Upper32 = _Lshr(OpSize::i64Bit, NewSegment, _Constant(32, ConstPad::NoPad)); + auto Masked = _And(OpSize::i32Bit, Upper32, _Constant(0xFF00'0000, ConstPad::NoPad)); Ref Merged = _Orlshr(OpSize::i32Bit, Masked, NewSegment, 16); NewSegment = _Bfi(OpSize::i32Bit, 8, 16, Merged, Upper32); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index b1c25bebed..eaa3398dde 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -372,7 +372,7 @@ class OpDispatchBuilder final : public IREmitter { void CMOVOp(OpcodeArgs); void CPUIDOp(OpcodeArgs); void XGetBVOp(OpcodeArgs); - uint32_t LoadConstantShift(X86Tables::DecodedOp Op, bool Is1Bit); + uint32_t GetConstantShift(X86Tables::DecodedOp Op, bool Is1Bit); void SHLOp(OpcodeArgs); void SHLImmediateOp(OpcodeArgs, bool SHL1Bit); void SHROp(OpcodeArgs); @@ -1172,7 +1172,7 @@ class OpDispatchBuilder final : public IREmitter { auto Const = Header->C(); if (Const->Constant == IR::NamedVectorConstant::NAMED_VECTOR_ZERO) { - Ref Zero = _Constant(0); + Ref Zero = _Constant(0, ConstPad::NoPad); Ref STP = _StoreContextPair(IR::OpSize::i64Bit, RegClass::GPR, Zero, Zero, Offset); // XXX: This works around InlineConstant not having an associated diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index c9295cbefe..dc7474175f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -5156,7 +5156,7 @@ void OpDispatchBuilder::Extrq_imm(OpcodeArgs) { } const uint64_t Mask = ~0ULL >> (MaskWidth == 0 ? 0 : (64 - MaskWidth)); - const Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask)); + const Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask, ConstPad::NoPad)); Result = _VAnd(OpSize::i128Bit, OpSize::i64Bit, Result, MaskVector); StoreResultFPR(Op, Result); @@ -5170,7 +5170,7 @@ void OpDispatchBuilder::Insertq_imm(OpcodeArgs) { Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags); const uint64_t Mask = ~0ULL >> (MaskWidth == 0 ? 0 : (64 - MaskWidth)); - Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask)); + Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask, ConstPad::NoPad)); // Mask incoming source. Src = _VAnd(OpSize::i64Bit, OpSize::i64Bit, Src, MaskVector); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index 5a39e6662e..55dc55707b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -769,7 +769,7 @@ void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) { void OpDispatchBuilder::FNCLEX(OpcodeArgs) { // Clear the exception flag bit - SetRFLAG(_Constant(0)); + SetRFLAG(_Constant(0, ConstPad::NoPad)); } void OpDispatchBuilder::FNINIT(OpcodeArgs) { diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index f15ea967a9..0960339c32 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -105,6 +105,11 @@ "PosInfinity = 2,", "TowardsZero = 3, /* Truncate */", "Host = 4," + ], + "class ConstPad : uint8_t": [ + "NoPad = 0,", + "DoPad = 1,", + "AutoPad = 2," ] }, "Defines": [ @@ -142,6 +147,7 @@ "MemOffsetType": "MemOffsetType", "BreakDefinition": "BreakDefinition", "RoundType": "RoundMode", + "ConstPad": "ConstPad", "FloatCompareOp": "FloatCompareOp", "NamedVectorConstant": "FEXCore::IR::NamedVectorConstant", "IndexNamedVectorConstant": "FEXCore::IR::IndexNamedVectorConstant", @@ -930,11 +936,15 @@ ] }, - "GPR = Constant i64:$Constant": { + "GPR = Constant i64:$Constant, ConstPad:$Pad{ConstPad::AutoPad}, i32:$MaxBytes{0}": { "Desc": ["Generates a 64bit constant inside of a GPR", "Unsupported to create a constant in FPR" ], - "DestSize": "OpSize::i64Bit" + "DestSize": "OpSize::i64Bit", + "EmitValidation": [ + "MaxBytes >= 0 && MaxBytes <= 8 && (MaxBytes & 1) == 0", + "MaxBytes == 0 || (Constant >> (MaxBytes * 8)) == 0" + ] }, "InlineConstant i64:$Constant": { diff --git a/FEXCore/Source/Interface/IR/IRDumper.cpp b/FEXCore/Source/Interface/IR/IRDumper.cpp index e83eeab48d..3a60747821 100644 --- a/FEXCore/Source/Interface/IR/IRDumper.cpp +++ b/FEXCore/Source/Interface/IR/IRDumper.cpp @@ -149,6 +149,17 @@ static void PrintArg(fextl::stringstream* out, const IRListView*, RoundMode Arg) }(); } +static void PrintArg(fextl::stringstream* out, const IRListView*, ConstPad Arg) { + *out << [Arg] { + switch (Arg) { + case ConstPad::NoPad: return "NoPad"; + case ConstPad::DoPad: return "DoPad"; + case ConstPad::AutoPad: return "AutoPad"; + } + return ""; + }(); +} + static void PrintArg(fextl::stringstream* out, const IRListView*, NamedVectorConstant Arg) { *out << [Arg] { // clang-format off diff --git a/FEXCore/Source/Interface/IR/IREmitter.h b/FEXCore/Source/Interface/IR/IREmitter.h index bd2dbbd26b..42667d356c 100644 --- a/FEXCore/Source/Interface/IR/IREmitter.h +++ b/FEXCore/Source/Interface/IR/IREmitter.h @@ -109,10 +109,10 @@ class IREmitter { return _Jump(InvalidNode); } IRPair _CondJump(Ref ssa0, CondClass cond = CondClass::NEQ) { - return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, GetOpSize(ssa0)); + return _CondJump(ssa0, _Constant(0, ConstPad::NoPad), InvalidNode, InvalidNode, cond, GetOpSize(ssa0)); } IRPair _CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClass cond = CondClass::NEQ) { - return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, GetOpSize(ssa0)); + return _CondJump(ssa0, _Constant(0, ConstPad::NoPad), ssa1, ssa2, cond, GetOpSize(ssa0)); } IRPair _LoadContextGPR(OpSize ByteSize, uint32_t Offset) { @@ -239,22 +239,33 @@ class IREmitter { DEF_ADDSUB(AddWithFlags) DEF_ADDSUB(SubWithFlags) - int64_t Constants[32]; + struct ConstantData { + int64_t Value; + ConstPad Pad; + int32_t MaxBytes; + [[nodiscard]] auto operator<=>(const ConstantData&) const noexcept = default; + }; + ConstantData Constants[32]; Ref ConstantRefs[32]; uint32_t NrConstants; - Ref Constant(int64_t Value) { + Ref Constant(int64_t Value, ConstPad Pad = ConstPad::AutoPad, int32_t MaxBytes = 0) { + const ConstantData Data { + .Value = Value, + .Pad = Pad, + .MaxBytes = MaxBytes, + }; // Search for the constant in the pool. for (unsigned i = 0; i < std::min(NrConstants, 32u); ++i) { - if (Constants[i] == Value) { + if (Constants[i] == Data) { return ConstantRefs[i]; } } // Otherwise, materialize a fresh constant and pool it. - Ref R = _Constant(Value); + Ref R = _Constant(Value, Pad, MaxBytes); unsigned i = (NrConstants++) & 31; - Constants[i] = Value; + Constants[i] = Data; ConstantRefs[i] = R; return R; } diff --git a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp index 6ea6900089..d4afd84c37 100644 --- a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp @@ -94,8 +94,9 @@ class ConstrainedRAPass final : public RegisterAllocationPass { // Remat if we can if (Rematerializable(IROp)) { - uint64_t Const = IROp->C()->Constant; - return IREmit->_Constant(Const); + const auto Op = IROp->C(); + uint64_t Const = Op->Constant; + return IREmit->_Constant(Const, Op->Pad, Op->MaxBytes); } // Otherwise fill from stack @@ -513,8 +514,8 @@ bool ConstrainedRAPass::TryPostRAMerge(Ref LastNode, Ref CodeNode, IROp_Header* if (CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) { const auto Result = CPUID->RunXCRFunction(ConstantFunction); IREmit->SetWriteCursorBefore(CodeNode); - IREmit->_Constant(Result.eax).Node->Reg = PhysicalRegister(Op->OutEAX).Raw; - IREmit->_Constant(Result.edx).Node->Reg = PhysicalRegister(Op->OutEDX).Raw; + IREmit->_Constant(Result.eax, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutEAX).Raw; + IREmit->_Constant(Result.edx, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutEDX).Raw; IREmit->RemovePostRA(CodeNode); return false; } @@ -532,10 +533,10 @@ bool ConstrainedRAPass::TryPostRAMerge(Ref LastNode, Ref CodeNode, IROp_Header* IREmit->SetWriteCursorBefore(CodeNode); IREmit->_Fence(IR::FenceType::Inst); - IREmit->_Constant(Result.eax).Node->Reg = PhysicalRegister(Op->OutEAX).Raw; - IREmit->_Constant(Result.ebx).Node->Reg = PhysicalRegister(Op->OutEBX).Raw; - IREmit->_Constant(Result.ecx).Node->Reg = PhysicalRegister(Op->OutECX).Raw; - IREmit->_Constant(Result.edx).Node->Reg = PhysicalRegister(Op->OutEDX).Raw; + IREmit->_Constant(Result.eax, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutEAX).Raw; + IREmit->_Constant(Result.ebx, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutEBX).Raw; + IREmit->_Constant(Result.ecx, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutECX).Raw; + IREmit->_Constant(Result.edx, ConstPad::NoPad).Node->Reg = PhysicalRegister(Op->OutEDX).Raw; IREmit->RemovePostRA(CodeNode); return false; } diff --git a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp index 12649b6e00..3209cf9e1d 100644 --- a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp @@ -387,11 +387,11 @@ inline void X87StackOptimization::Reset() { inline Ref X87StackOptimization::GetConstant(ssize_t Offset) { if (Offset < 0 || Offset >= X87StackOptimization::ConstantPool.size()) { // not dealt by pool - return IREmit->_Constant(Offset); + return IREmit->_Constant(Offset, ConstPad::NoPad); } if (ConstantPool[Offset] == nullptr) { - ConstantPool[Offset] = IREmit->_Constant(Offset); + ConstantPool[Offset] = IREmit->_Constant(Offset, ConstPad::NoPad); } return ConstantPool[Offset]; }