Skip to content

Commit

Permalink
JIT: Fix small->float casts with AVX-512 (#112892)
Browse files Browse the repository at this point in the history
  • Loading branch information
saucecontrol authored Feb 25, 2025
1 parent c7a2924 commit 8017079
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 52 deletions.
11 changes: 1 addition & 10 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7287,7 +7287,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
#endif

var_types dstType = treeNode->CastToType();
var_types srcType = op1->TypeGet();
var_types srcType = genActualType(op1->TypeGet());
assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));

// Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
Expand All @@ -7302,15 +7302,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
srcType = TYP_I_IMPL;
}

// At this point, we should not see a srcType that is not int or long.
// For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
// either the front-end or lowering phase to have generated two levels of cast.
// The first one is for widening smaller int type to int32 and the second one is
// to the float/double.
// On 32-bit, we expect morph to replace long to float/double casts with helper calls,
// so we should only see int here.
noway_assert(varTypeIsIntOrI(srcType));

// To convert integral type to floating, the cvt[u]si2ss/sd instruction is used
// which does a partial write to lower 4/8 bytes of xmm register keeping the other
// upper bytes unmodified. If "cvt[u]si2ss/sd xmmReg, r32/r64" occurs inside a loop,
Expand Down
50 changes: 8 additions & 42 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,6 @@ GenTree* Lowering::LowerCast(GenTree* tree)
var_types castToType = tree->CastToType();
var_types dstType = castToType;
var_types srcType = castOp->TypeGet();
var_types tmpType = TYP_UNDEF;

// force the srcType to unsigned if GT_UNSIGNED flag is set
if (tree->IsUnsigned())
Expand All @@ -837,11 +836,14 @@ GenTree* Lowering::LowerCast(GenTree* tree)
// Reason: must be converted to a helper call
// srcType = float/double, castToType = ulong
// Reason: must be converted to a helper call
// srcType = float/double, castToType = byte/sbyte/ushort/short
// Reason: must have intermediate cast to int
// srcType = uint castToType = float/double
// Reason: uint -> float/double = uint -> long -> float/double
if (varTypeIsFloating(srcType))
{
noway_assert(!tree->gtOverflow());
assert(!varTypeIsSmall(dstType));
assert(castToType != TYP_ULONG || comp->canUseEvexEncodingDebugOnly());
}
else if (srcType == TYP_UINT)
Expand Down Expand Up @@ -1164,30 +1166,6 @@ GenTree* Lowering::LowerCast(GenTree* tree)
}
#endif // TARGET_AMD64

// Case of src is a small type and dst is a floating point type.
if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType))
{
// These conversions can never be overflow detecting ones.
noway_assert(!tree->gtOverflow());
tmpType = TYP_INT;
}
// case of src is a floating point type and dst is a small type.
else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType))
{
tmpType = TYP_INT;
}

if (tmpType != TYP_UNDEF)
{
GenTree* tmp = comp->gtNewCastNode(tmpType, castOp, tree->IsUnsigned(), tmpType);
tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));

tree->gtFlags &= ~GTF_UNSIGNED;
tree->AsOp()->gtOp1 = tmp;
BlockRange().InsertAfter(castOp, tmp);
ContainCheckCast(tmp->AsCast());
}

// Now determine if we have operands that should be contained.
ContainCheckCast(tree->AsCast());
return nullptr;
Expand Down Expand Up @@ -8567,26 +8545,14 @@ void Lowering::ContainCheckCast(GenTreeCast* node)

if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType))
{
#ifdef DEBUG
// If converting to float/double, the operand must be 4 or 8 byte in size.
if (varTypeIsFloating(castToType))
if (castOp->IsCnsNonZeroFltOrDbl())
{
unsigned opSize = genTypeSize(srcType);
assert(opSize == 4 || opSize == 8);
MakeSrcContained(node, castOp);
}
#endif // DEBUG

// U8 -> R8 conversion requires that the operand be in a register.
if (srcType != TYP_ULONG)
else
{
if (castOp->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, castOp);
}
else
{
srcIsContainable = true;
}
// The ulong->floating SSE2 fallback requires the source to be in register
srcIsContainable = !varTypeIsSmall(srcType) && ((srcType != TYP_ULONG) || comp->canUseEvexEncoding());
}
}
else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType))
Expand Down
23 changes: 23 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_112871/Runtime_112871.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// Generated by Fuzzlyn v2.4 on 2025-02-23 17:30:17
// Run on X64 Windows
// Seed: 18098778274409643257-vectort,vector128,vector256,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
// Reduced from 23.4 KiB to 0.4 KiB in 00:01:16

using System;
using Xunit;

public class Runtime_112871
{
public static short s_1;

[Fact]
public static void Problem()
{
ushort vr0 = default(ushort);
s_1 = -1;
Assert.True((double)0 <= (uint)s_1);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>False</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 8017079

Please sign in to comment.