Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CodeEmitter/CodeEmitter/Emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ enum class SystemRegister : uint32_t {
RNDRRS = GenSystemReg<0b11, 0b011, 0b0010, 0b0100, 0b001>,
NZCV = GenSystemReg<0b11, 0b011, 0b0100, 0b0010, 0b000>,
FPCR = GenSystemReg<0b11, 0b011, 0b0100, 0b0100, 0b000>,
FPSR = GenSystemReg<0b11, 0b011, 0b0100, 0b0100, 0b001>,
TPIDRRO_EL0 = GenSystemReg<0b11, 0b011, 0b1101, 0b0000, 0b011>,
CNTFRQ_EL0 = GenSystemReg<0b11, 0b011, 0b1110, 0b0000, 0b000>,
CNTVCT_EL0 = GenSystemReg<0b11, 0b011, 0b1110, 0b0000, 0b010>,
Expand Down
7 changes: 7 additions & 0 deletions FEXCore/Scripts/json_ir_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class OpDefinition:
NonSSAArgNum: int
DynamicDispatch: bool
LoweredX87: bool
SetsIOC: bool
JITDispatch: bool
JITDispatchOverride: str
TiedSource: int
Expand All @@ -80,6 +81,7 @@ def __init__(self):
self.NonSSAArgNum = 0
self.DynamicDispatch = False
self.LoweredX87 = False
self.SetsIOC = False
self.JITDispatch = True
self.JITDispatchOverride = None
self.TiedSource = -1
Expand Down Expand Up @@ -276,6 +278,9 @@ def parse_ops(ops):
assert("JITDispatch" not in op_val)
OpDef.JITDispatch = False

if "SetsIOC" in op_val:
OpDef.SetsIOC = op_val["SetsIOC"]

if "TiedSource" in op_val:
OpDef.TiedSource = op_val["TiedSource"]

Expand Down Expand Up @@ -413,6 +418,7 @@ def print_ir_sizes():
[[nodiscard, gnu::const]] bool ImplicitFlagClobber(IROps Op);
[[nodiscard, gnu::const]] bool GetHasDest(IROps Op);
[[nodiscard, gnu::const]] bool LoweredX87(IROps Op);
[[nodiscard, gnu::const, gnu::visibility("default")]] bool SetsIOC(IROps Op);
[[nodiscard, gnu::const]] int8_t TiedSource(IROps Op);

#undef IROP_SIZES
Expand Down Expand Up @@ -511,6 +517,7 @@ def print_ir_hassideeffects():
("HasSideEffects", "bool"),
("ImplicitFlagClobber", "bool"),
("LoweredX87", "bool"),
("SetsIOC", "bool"),
("TiedSource", "int8_t"),
]:
output_file.write(
Expand Down
32 changes: 32 additions & 0 deletions FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/SHMStats.h>

#include <cmath>
#include <limits>

namespace FEXCore::CPU {
FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW, bool Force80BitPrecision = false) {
softfloat_state State {};
Expand Down Expand Up @@ -342,6 +345,10 @@ template<>
struct OpHandlers<IR::OP_F64SIN> {
FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
if (std::isinf(src)) {
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return std::numeric_limits<double>::quiet_NaN();
}
return sin(src);
}
};
Expand All @@ -350,6 +357,10 @@ template<>
struct OpHandlers<IR::OP_F64COS> {
FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
if (std::isinf(src)) {
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return std::numeric_limits<double>::quiet_NaN();
}
return cos(src);
}
};
Expand All @@ -363,6 +374,10 @@ struct OpHandlers<IR::OP_F64SINCOS> {
sin = ::sin(src);
cos = ::cos(src);
#else
if (std::isinf(src)) {
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()};
}
sincos(src, &sin, &cos);
#endif
return VectorScalarF64Pair {sin, cos};
Expand All @@ -373,6 +388,10 @@ template<>
struct OpHandlers<IR::OP_F64TAN> {
FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
if (std::isinf(src)) {
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return std::numeric_limits<double>::quiet_NaN();
}
return tan(src);
}
};
Expand All @@ -397,6 +416,13 @@ template<>
struct OpHandlers<IR::OP_F64FPREM> {
FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);

// Check for invalid operation cases that should set Invalid Operation flag
if (std::isinf(src1) || src2 == 0.0) {
// FPREM with infinite dividend or zero divisor is invalid operation
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return std::numeric_limits<double>::quiet_NaN();
}
return fmod(src1, src2);
}
};
Expand All @@ -405,6 +431,12 @@ template<>
struct OpHandlers<IR::OP_F64FPREM1> {
FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
// Check for invalid operation cases that should set Invalid Operation flag
if (std::isinf(src1) || src2 == 0.0) {
// FPREM1 with infinite dividend or zero divisor is invalid operation
Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
return std::numeric_limits<double>::quiet_NaN();
}
return remainder(src1, src2);
}
};
Expand Down
11 changes: 11 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,12 @@ void Arm64JITCore::EmitEntryPoint(ARMEmitter::BackwardLabel& HeaderLabel, bool C
}
}

void Arm64JITCore::ClearFPSRIOC() {
mrs(TMP1, ARMEmitter::SystemRegister::FPSR);
bic(ARMEmitter::Size::i32Bit, TMP1, TMP1, 1);
msr(ARMEmitter::SystemRegister::FPSR, TMP1);
}

CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uint64_t Size, bool SingleInst, const FEXCore::IR::IRListView* IR,
FEXCore::Core::DebugData* DebugData, bool CheckTF) {
FEXCORE_PROFILE_SCOPED("Arm64::CompileCode");
Expand Down Expand Up @@ -933,6 +939,11 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uint64_t Size
}

for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) {
// Clear FPSR IOC bit before non-x87 operations that can set it (only in reduced precision mode)
if (ReducedPrecisionMode && FEXCore::IR::SetsIOC(IROp->Op) && !FEXCore::IR::LoweredX87(IROp->Op)) {
ClearFPSRIOC();
}

switch (IROp->Op) {
#define REGISTER_OP_RT(op, x) \
case FEXCore::IR::IROps::OP_##op: std::invoke(RT_##x, this, IROp, CodeNode); break
Expand Down
3 changes: 3 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/JITClass.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

private:
FEX_CONFIG_OPT(ParanoidTSO, PARANOIDTSO);
FEX_CONFIG_OPT(ReducedPrecisionMode, X87REDUCEDPRECISION);

const bool HostSupportsSVE128 {};
const bool HostSupportsSVE256 {};
Expand Down Expand Up @@ -414,6 +415,8 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

void EmitEntryPoint(ARMEmitter::BackwardLabel& HeaderLabel, bool CheckTF);

void ClearFPSRIOC();

// Runtime selection;
// Load and store TSO memory style
OpType RT_LoadMemTSO;
Expand Down
13 changes: 13 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/MiscOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ DEF_OP(GetRoundingMode) {
bfi(ARMEmitter::Size::i64Bit, Dst, TMP1, 30, 2);
}

DEF_OP(ReadFPSR) {
auto Dst = GetReg(Node);
mrs(Dst, ARMEmitter::SystemRegister::FPSR);
}

DEF_OP(WriteFPSR) {
auto Op = IROp->C<IR::IROp_WriteFPSR>();
auto Val = GetReg(Op->FPSR);

// Write FPSR register
msr(ARMEmitter::SystemRegister::FPSR, Val.X());
}

DEF_OP(SetRoundingMode) {
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
auto Src = GetReg(Op->RoundMode);
Expand Down
4 changes: 4 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,10 @@ void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) {
void OpDispatchBuilder::FNCLEX(OpcodeArgs) {
// Clear the exception flag bit
SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(_Constant(0));
Ref Current = _ReadFPSR();
// Clear the last bit of FPSR
Current = _Andn(OpSize::i64Bit, Current, _Constant(1));
_WriteFPSR(OpSize::i32Bit, Current);
}

void OpDispatchBuilder::FNINIT(OpcodeArgs) {
Expand Down
32 changes: 30 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,41 @@ void OpDispatchBuilder::FISTF64(OpcodeArgs, bool Truncate) {
const auto Size = OpSizeFromSrc(Op);

Ref data = _ReadStackValue(0);

// This operation is invalid if:
// * Operand is NaN
// * Operand is (+/-) infinity
// * Operand is too large for the destination type
//
// We need to deal 16bits as a special case since _Float_ToGPR_S/ZS
// do not handle 16bit conversions. So we convert to 32bits instead.
// Then if it's invalid for 32bits, we mark it as invalid for 16bit but in addition
// check the bounds for 16bit.
// Conversion will work for 16bit even if cast to 32 because we checked already it fits within 16bit.
if (Truncate) {
data = _Float_ToGPR_ZS(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
data = _Float_ToGPR_ZS(Size == OpSize::i16Bit ? OpSize::i32Bit : Size, OpSize::i64Bit, data);
} else {
data = _Float_ToGPR_S(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
data = _Float_ToGPR_S(Size == OpSize::i16Bit ? OpSize::i32Bit : Size, OpSize::i64Bit, data);
}

StoreResult_WithOpSize(GPRClass, Op, Op->Dest, data, Size, OpSize::i8Bit);

// Additional check for 16-bit range overflow
if (Size == OpSize::i16Bit) {
// Use sign extension approach: if value fits in 16-bit signed range,
// then sign-extending it should give the same value
Ref SignExtended = _Sbfe(OpSize::i32Bit, 16, 0, data);

// Test if they're different (indicating overflow)
_SubNZCV(OpSize::i32Bit, data, SignExtended);

// Set invalid operation bit conditionally
Ref Bit = _NZCVSelect(OpSize::i32Bit, {COND_NEQ}, _Constant(1), _Constant(0));
CheckFPSRIOCAndSetIOBit(Bit);
} else {
CheckFPSRIOCAndSetIOBit();
}

if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) {
_PopStackDestroy();
}
Expand Down
Loading
Loading