Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions FEXCore/Source/Common/SoftFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,47 @@ struct FEX_PACKED X80SoftFloat {
return std::bit_cast<float>(Result);
}

bool IsSignalingNaN() const {
return (Exponent == 0x7FFF) && (Significand & 0x8000000000000000ULL) && !(Significand & 0x4000000000000000ULL) && // Bit 62 clear (signaling)
(Significand & 0x3FFFFFFFFFFFFFFFULL);
}

bool IsQuietNaN() const {
return (Exponent == 0x7FFF) && (Significand & 0x8000000000000000ULL) && (Significand & 0x4000000000000000ULL); // Bit 62 set (quiet)
}

// Helper to detect if this is any NaN
bool IsNaN() const {
return IsSignalingNaN() || IsQuietNaN();
}

// X87 value to F64 while preserving signaling nan property
double ToF64_PreserveNaN(softfloat_state* state) const {
if (IsSignalingNaN()) {
// we keep it as a signaling nan in ieee754 in 64bits
uint64_t sign_bit = Sign ? 0x8000000000000000ULL : 0;
uint64_t exp_bits = 0x7FF0000000000000ULL;
uint64_t x87_frac = Significand & 0x3FFFFFFFFFFFFFFFULL;
uint64_t ieee_frac = (x87_frac >> 11) & 0x0007FFFFFFFFFFFFULL;

if (ieee_frac == 0) {
ieee_frac = 1;
}
ieee_frac &= ~0x0008000000000000ULL;

uint64_t result_bits = sign_bit | exp_bits | ieee_frac;
return std::bit_cast<double>(result_bits);
} else if (IsQuietNaN()) {
const float64_t Result = extF80_to_f64(state, *this);
uint64_t result_bits = std::bit_cast<uint64_t>(Result);
result_bits |= 0x0008000000000000ULL;
return std::bit_cast<double>(result_bits);
} else {
const float64_t Result = extF80_to_f64(state, *this);
return std::bit_cast<double>(Result);
}
}

double ToF64(softfloat_state* state) const {
const float64_t Result = extF80_to_f64(state, *this);
return std::bit_cast<double>(Result);
Expand Down Expand Up @@ -584,6 +625,39 @@ struct FEX_PACKED X80SoftFloat {
*this = f64_to_extF80(state, std::bit_cast<float64_t>(rhs));
}

// Create X80SoftFloat from double while preserving NaN signaling properties
static X80SoftFloat FromF64_PreserveNaN(softfloat_state* state, double value) {
uint64_t bits = std::bit_cast<uint64_t>(value);

// Check if it's a nan
if ((bits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL && (bits & 0x000FFFFFFFFFFFFFULL) != 0) {

X80SoftFloat result;
result.Sign = (bits >> 63) & 1;
result.Exponent = 0x7FFF;

bool is_signaling = !(bits & 0x0008000000000000ULL);
uint64_t ieee_payload = bits & 0x0007FFFFFFFFFFFFULL;

// set bit 63 required for x87
result.Significand = 0x8000000000000000ULL;

if (is_signaling) { // clear bit 62 for signaling nan
result.Significand &= ~0x4000000000000000ULL;
} else { // set bit 62 for quiet nan
result.Significand |= 0x4000000000000000ULL;
}

// ieee754 51-bit payload -> x87 62-bit payload
result.Significand |= (ieee_payload << 11) & 0x3FFFFFFFFFFFFFFFULL;

return result;
}

// For non-NaN values, use standard conversion
return X80SoftFloat(state, value);
}

X80SoftFloat(softfloat_state* state, BIGFLOAT rhs) {
#if BIGFLOATSIZE == 16
*this = f128_to_extF80(state, std::bit_cast<float128_t>(rhs));
Expand Down
6 changes: 6 additions & 0 deletions FEXCore/Source/Interface/Config/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,12 @@ void ReloadMetaLayer() {
// Single stepping also enforces single instruction size blocks
Set(FEXCore::Config::ConfigOption::CONFIG_MAXINST, "1");
}

if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_X87STRICTREDUCEDPRECISION) &&
Meta->GetConv<bool>(FEXCore::Config::CONFIG_X87STRICTREDUCEDPRECISION).value_or(false)) {
// Strict reduced precision requires reduced precision to be enabled
Set(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION, "1");
}
}

void AddLayer(fextl::unique_ptr<FEXCore::Config::Layer> _Layer) {
Expand Down
8 changes: 8 additions & 0 deletions FEXCore/Source/Interface/Config/Config.json.in
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,14 @@
"Emulates X87 floating point using 64-bit precision. This reduces emulation accuracy and may result in rendering bugs."
]
},
"X87StrictReducedPrecision": {
"Type": "bool",
"Default": "false",
"Desc": [
"Enables stricter X87 floating point behavior when X87ReducedPrecision is enabled.",
"Adds additional checks and implementations like NaN propagation for better compatibility."
]
},
"StallProcess": {
"Type": "bool",
"Default": "false",
Expand Down
1 change: 1 addition & 0 deletions FEXCore/Source/Interface/Context/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ class ContextImpl final : public FEXCore::Context::Context, public CPU::CodeBuff
FEX_CONFIG_OPT(BlockJITNaming, BLOCKJITNAMING);
FEX_CONFIG_OPT(GDBSymbols, GDBSYMBOLS);
FEX_CONFIG_OPT(x87ReducedPrecision, X87REDUCEDPRECISION);
FEX_CONFIG_OPT(x87StrictReducedPrecision, X87STRICTREDUCEDPRECISION);
FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY);
FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS);
FEX_CONFIG_OPT(SmallTSCScale, SMALLTSCSCALE);
Expand Down
7 changes: 0 additions & 7 deletions FEXCore/Source/Interface/Core/Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,13 @@ desc: Glues Frontend, OpDispatcher and IR Opts & Compilation, LookupCache, Dispa
#include <algorithm>
#include <array>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <fcntl.h>
#include <functional>
#include <mutex>
#include <queue>
#include <shared_mutex>
#include <signal.h>
#include <stdio.h>
#include <string_view>
#include <sys/stat.h>
#include <type_traits>
#include <unistd.h>
#include <unordered_map>
#include <utility>
#include <xxhash.h>

Expand Down
14 changes: 14 additions & 0 deletions FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
#pragma once
#include "Common/SoftFloat.h"

#include "Interface/Context/Context.h"
#include "Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/SHMStats.h>
#include <FEXCore/Config/Config.h>

namespace FEXCore::CPU {
FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW, bool Force80BitPrecision = false) {
Expand Down Expand Up @@ -77,6 +79,12 @@ struct OpHandlers<IR::OP_F80CVTTO> {
FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle8(uint16_t FCW, double src, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
ScopedSoftFloatState State {FCW, Frame};
auto Context = static_cast<Context::ContextImpl*>(Frame->Thread->CTX);
auto ReducedPrecisionMode = Context->Config.x87ReducedPrecision;
auto StrictReducedPrecisionMode = Context->Config.x87StrictReducedPrecision;
if (!ReducedPrecisionMode || StrictReducedPrecisionMode) {
return X80SoftFloat::FromF64_PreserveNaN(&State.State, src);
}
return X80SoftFloat(&State.State, src);
}
};
Expand Down Expand Up @@ -115,6 +123,12 @@ struct OpHandlers<IR::OP_F80CVT> {
FEXCORE_PRESERVE_ALL_ATTR static double handle8(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
ScopedSoftFloatState State {FCW, Frame};
auto Context = static_cast<Context::ContextImpl*>(Frame->Thread->CTX);
auto ReducedPrecisionMode = Context->Config.x87ReducedPrecision;
auto StrictReducedPrecisionMode = Context->Config.x87StrictReducedPrecision;
if (!ReducedPrecisionMode || StrictReducedPrecisionMode) {
return X80SoftFloat(src).ToF64_PreserveNaN(&State.State);
}
return X80SoftFloat(src).ToF64(&State.State);
}
};
Expand Down
1 change: 1 addition & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,7 @@ class OpDispatchBuilder final : public IREmitter {

private:
FEX_CONFIG_OPT(ReducedPrecisionMode, X87REDUCEDPRECISION);
FEX_CONFIG_OPT(StrictReducedPrecisionMode, X87STRICTREDUCEDPRECISION);

struct JumpTargetInfo {
Ref BlockEntry;
Expand Down
54 changes: 52 additions & 2 deletions FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,18 +157,22 @@ class X87StackOptimization final : public Pass {
: Features(Features)
, GPROpSize(GPROpSize) {
FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION);
FEX_CONFIG_OPT(StrictReducedPrecision, X87STRICTREDUCEDPRECISION);
ReducedPrecisionMode = ReducedPrecision;
StrictReducedPrecisionMode = StrictReducedPrecision;
}
void Run(IREmitter* Emit) override;

private:
const FEXCore::HostFeatures& Features;
const OpSize GPROpSize;
bool ReducedPrecisionMode;
bool StrictReducedPrecisionMode;
FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS);

// Helpers
Ref RotateRight8(uint32_t V, Ref Amount);
Ref SilenceNaN(Ref Value, OpSize StoreSize);

void F80SplitStore_Helper(const IROp_StoreStackMem* Op, Ref StackNode, Ref AddrNode, Ref Offset, OpSize Align, MemOffsetType OffsetType,
uint8_t OffsetScale) {
Expand Down Expand Up @@ -215,6 +219,7 @@ class X87StackOptimization final : public Pass {
case OpSize::i32Bit:
case OpSize::i64Bit: {
StackNode = IREmit->_F80CVT(Op->StoreSize, StackNode);
StackNode = SilenceNaN(StackNode, Op->StoreSize);
IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
break;
}
Expand Down Expand Up @@ -244,6 +249,9 @@ class X87StackOptimization final : public Pass {
[[fallthrough]];
}
case OpSize::i64Bit: {
if (StrictReducedPrecisionMode) {
StackNode = SilenceNaN(StackNode, Op->StoreSize);
}
IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
break;
}
Expand Down Expand Up @@ -333,6 +341,8 @@ class X87StackOptimization final : public Pass {
// Cache for Constants
// ConstantPoll[i] has IREmit->_Constant(i);
std::array<Ref, 8> ConstantPool {};
Ref CachedQuietBit32 {};
Ref CachedQuietBit64 {};
Ref GetConstant(ssize_t Offset);

// Cached value for Top
Expand Down Expand Up @@ -367,6 +377,8 @@ inline const X87StackOptimization::StackMemberInfo X87StackOptimization::StackMe
inline void X87StackOptimization::InvalidateCaches() {
InvalidateCachedRegs();
ConstantPool.fill(nullptr);
CachedQuietBit32 = nullptr;
CachedQuietBit64 = nullptr;
}

inline void X87StackOptimization::InvalidateCachedRegs() {
Expand All @@ -385,12 +397,23 @@ inline void X87StackOptimization::Reset() {
}

inline Ref X87StackOptimization::GetConstant(ssize_t Offset) {
if (Offset == 0x00400000) {
if (!CachedQuietBit32) {
CachedQuietBit32 = IREmit->_Constant(Offset);
}
return CachedQuietBit32;
}
if (Offset == 0x0008000000000000LL) {
if (!CachedQuietBit64) {
CachedQuietBit64 = IREmit->_Constant(Offset);
}
return CachedQuietBit64;
}

if (Offset < 0 || Offset >= X87StackOptimization::ConstantPool.size()) {
// not dealt by pool
return IREmit->_Constant(Offset);
}
if (ConstantPool[Offset] == nullptr) {

ConstantPool[Offset] = IREmit->_Constant(Offset);
}
return ConstantPool[Offset];
Expand Down Expand Up @@ -493,6 +516,30 @@ inline Ref X87StackOptimization::RotateRight8(uint32_t V, Ref Amount) {
return IREmit->_Lshr(OpSize::i32Bit, GetConstant(V | (V << 8)), Amount);
}

inline Ref X87StackOptimization::SilenceNaN(Ref Value, OpSize StoreSize) {
// We expect Value here to reach after conversion - so it's already in the target size (32 or 64 bit float)
// Never 80bit since we do not silence 80bit values, since it's likely a copy in that case.
LOGMAN_THROW_A_FMT(StoreSize == OpSize::i32Bit || StoreSize == OpSize::i64Bit, "Unexpected store size");

const auto RegisterSize = OpSize::i64Bit;
const auto ElementSize = StoreSize;

// Create quiet bit constant in FPR
Ref QuietBitConst;
if (StoreSize == OpSize::i32Bit) {
// 0x00400000 - Bit 22 for 32-bit float
QuietBitConst = IREmit->_VCastFromGPR(RegisterSize, ElementSize, GetConstant(0x00400000U));
} else {
// 0x0008000000000000 - Bit 51 for 64-bit double
QuietBitConst = IREmit->_VCastFromGPR(RegisterSize, ElementSize, GetConstant(0x0008000000000000ULL));
}

// NaN detection: fcmeq(v, v) == 0xFFFFFFFF if NOT NaN, 0x00000000 if NaN
Ref IsNotNaNMask = IREmit->_VFCMPEQ(RegisterSize, ElementSize, Value, Value);
Ref Silenced = IREmit->_VOr(RegisterSize, ElementSize, Value, QuietBitConst);
return IREmit->_VBSL(RegisterSize, IsNotNaNMask, Value, Silenced);
}

inline std::optional<X87StackOptimization::StackMemberInfo> X87StackOptimization::MigrateToSlowPath_IfInvalid(uint8_t Offset) {
const auto& [Valid, StackMember] = StackData.top(Offset);
MigrateToSlowPathIf(Valid != StackSlot::VALID);
Expand Down Expand Up @@ -1011,6 +1058,9 @@ void X87StackOptimization::Run(IREmitter* Emit) {
if (Op->StoreSize == OpSize::f80Bit) {
Store80BitToMem(Op, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale);
} else {
if (!ReducedPrecisionMode || StrictReducedPrecisionMode) {
SourceValue = SilenceNaN(SourceValue, Op->StoreSize);
}
IREmit->_StoreMemFPR(StoreSize, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale);
}
break;
Expand Down
7 changes: 7 additions & 0 deletions Source/Tools/FEXConfig/main.qml
Original file line number Diff line number Diff line change
Expand Up @@ -706,10 +706,17 @@ ApplicationWindow {
}

ConfigCheckBox {
id: x87ReducedPrecisionCheckbox
text: qsTr("Reduced x87 precision")
config: "X87ReducedPrecision"
}

ConfigCheckBox {
text: qsTr("Strict reduced x87 precision")
config: "X87StrictReducedPrecision"
enabled: x87ReducedPrecisionCheckbox.checked
}

ConfigCheckBox {
text: qsTr("Disable JIT optimization passes")
config: "O0"
Expand Down
2 changes: 1 addition & 1 deletion unittests/32Bit_ASM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ foreach(ASM_SRC ${ASM_SOURCES})

add_custom_command(OUTPUT ${OUTPUT_NAME}
DEPENDS "${TMP_FILE}"
COMMAND "nasm" ARGS "${TMP_FILE}" "-o" "${OUTPUT_NAME}")
COMMAND "nasm" ARGS "-i" "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Includes/" "${TMP_FILE}" "-o" "${OUTPUT_NAME}")

add_custom_command(OUTPUT ${OUTPUT_CONFIG_NAME}
DEPENDS "${ASM_SRC}"
Expand Down
Loading
Loading