Skip to content

Commit f8063ff

Browse files
authored
[VP][RISCV] Add vp.reduce.fmaximum/fminimum and its RISC-V codegen (llvm#91782)
`vp.reduce.fmaximum/fminimum` are the VP version of `vector.reduce.fmaximum/fminimum`.
1 parent 4198aeb commit f8063ff

File tree

13 files changed

+453
-29
lines changed

13 files changed

+453
-29
lines changed

llvm/docs/LangRef.rst

+140
Original file line numberDiff line numberDiff line change
@@ -22182,6 +22182,146 @@ Examples:
2218222182
%also.r = call float @llvm.minnum.f32(float %reduction, float %start)
2218322183

2218422184

22185+
.. _int_vp_reduce_fmaximum:
22186+
22187+
'``llvm.vp.reduce.fmaximum.*``' Intrinsics
22188+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
22189+
22190+
Syntax:
22191+
"""""""
22192+
This is an overloaded intrinsic.
22193+
22194+
::
22195+
22196+
declare float @llvm.vp.reduce.fmaximum.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
22197+
declare double @llvm.vp.reduce.fmaximum.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
22198+
22199+
Overview:
22200+
"""""""""
22201+
22202+
Predicated floating-point ``MAX`` reduction of a vector and a scalar starting
22203+
value, returning the result as a scalar.
22204+
22205+
22206+
Arguments:
22207+
""""""""""
22208+
22209+
The first operand is the start value of the reduction, which must be a scalar
22210+
floating-point type equal to the result type. The second operand is the vector
22211+
on which the reduction is performed and must be a vector of floating-point
22212+
values whose element type is the result/start type. The third operand is the
22213+
vector mask and is a vector of boolean values with the same number of elements
22214+
as the vector operand. The fourth operand is the explicit vector length of the
22215+
operation.
22216+
22217+
Semantics:
22218+
""""""""""
22219+
22220+
The '``llvm.vp.reduce.fmaximum``' intrinsic performs the floating-point ``MAX``
22221+
reduction (:ref:`llvm.vector.reduce.fmaximum <int_vector_reduce_fmaximum>`) of
22222+
the vector operand ``val`` on each enabled lane, taking the maximum of that and
22223+
the scalar ``start_value``. Disabled lanes are treated as containing the
22224+
neutral value (i.e. having no effect on the reduction operation). If the vector
22225+
length is zero, the result is the start value.
22226+
22227+
The neutral value is dependent on the :ref:`fast-math flags <fastmath>`. If no
22228+
flags are set or only the ``nnan`` is set, the neutral value is ``-Infinity``.
22229+
If ``ninf`` is set, then the neutral value is the smallest floating-point value
22230+
for the result type.
22231+
22232+
This instruction has the same comparison semantics as the
22233+
:ref:`llvm.vector.reduce.fmaximum <int_vector_reduce_fmaximum>` intrinsic (and
22234+
thus the '``llvm.maximum.*``' intrinsic). That is, the result will always be a
22235+
number unless any of the elements in the vector or the starting value is
22236+
``NaN``. Namely, this intrinsic propagates ``NaN``. Also, -0.0 is considered
22237+
less than +0.0.
22238+
22239+
To ignore the start value, the neutral value can be used.
22240+
22241+
Examples:
22242+
"""""""""
22243+
22244+
.. code-block:: llvm
22245+
22246+
%r = call float @llvm.vp.reduce.fmaximum.v4f32(float %float, <4 x float> %a, <4 x i1> %mask, i32 %evl)
22247+
; %r is equivalent to %also.r, where lanes greater than or equal to %evl
22248+
; are treated as though %mask were false for those lanes.
22249+
22250+
%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float -infinity, float -infinity, float -infinity, float -infinity>
22251+
%reduction = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %masked.a)
22252+
%also.r = call float @llvm.maximum.f32(float %reduction, float %start)
22253+
22254+
22255+
.. _int_vp_reduce_fminimum:
22256+
22257+
'``llvm.vp.reduce.fminimum.*``' Intrinsics
22258+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
22259+
22260+
Syntax:
22261+
"""""""
22262+
This is an overloaded intrinsic.
22263+
22264+
::
22265+
22266+
declare float @llvm.vp.reduce.fminimum.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
22267+
declare double @llvm.vp.reduce.fminimum.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
22268+
22269+
Overview:
22270+
"""""""""
22271+
22272+
Predicated floating-point ``MIN`` reduction of a vector and a scalar starting
22273+
value, returning the result as a scalar.
22274+
22275+
22276+
Arguments:
22277+
""""""""""
22278+
22279+
The first operand is the start value of the reduction, which must be a scalar
22280+
floating-point type equal to the result type. The second operand is the vector
22281+
on which the reduction is performed and must be a vector of floating-point
22282+
values whose element type is the result/start type. The third operand is the
22283+
vector mask and is a vector of boolean values with the same number of elements
22284+
as the vector operand. The fourth operand is the explicit vector length of the
22285+
operation.
22286+
22287+
Semantics:
22288+
""""""""""
22289+
22290+
The '``llvm.vp.reduce.fminimum``' intrinsic performs the floating-point ``MIN``
22291+
reduction (:ref:`llvm.vector.reduce.fminimum <int_vector_reduce_fminimum>`) of
22292+
the vector operand ``val`` on each enabled lane, taking the minimum of that and
22293+
the scalar ``start_value``. Disabled lanes are treated as containing the neutral
22294+
value (i.e. having no effect on the reduction operation). If the vector length
22295+
is zero, the result is the start value.
22296+
22297+
The neutral value is dependent on the :ref:`fast-math flags <fastmath>`. If no
22298+
flags are set or only the ``nnan`` is set, the neutral value is ``+Infinity``.
22299+
If ``ninf`` is set, then the neutral value is the largest floating-point value
22300+
for the result type.
22301+
22302+
This instruction has the same comparison semantics as the
22303+
:ref:`llvm.vector.reduce.fminimum <int_vector_reduce_fminimum>` intrinsic (and
22304+
thus the '``llvm.minimum.*``' intrinsic). That is, the result will always be a
22305+
number unless any of the elements in the vector or the starting value is
22306+
``NaN``. Namely, this intrinsic propagates ``NaN``. Also, -0.0 is considered
22307+
less than +0.0.
22308+
22309+
To ignore the start value, the neutral value can be used.
22310+
22311+
Examples:
22312+
"""""""""
22313+
22314+
.. code-block:: llvm
22315+
22316+
%r = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %a, <4 x i1> %mask, i32 %evl)
22317+
; %r is equivalent to %also.r, where lanes greater than or equal to %evl
22318+
; are treated as though %mask were false for those lanes.
22319+
22320+
%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float infinity, float infinity, float infinity, float infinity>
22321+
%reduction = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %masked.a)
22322+
%also.r = call float @llvm.minimum.f32(float %reduction, float %start)
22323+
22324+
2218522325
.. _int_get_active_lane_mask:
2218622326

2218722327
'``llvm.get.active.lane.mask.*``' Intrinsics

llvm/include/llvm/IR/Intrinsics.td

+10
Original file line numberDiff line numberDiff line change
@@ -2243,6 +2243,16 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
22432243
llvm_anyvector_ty,
22442244
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
22452245
llvm_i32_ty]>;
2246+
def int_vp_reduce_fmaximum : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
2247+
[ LLVMVectorElementType<0>,
2248+
llvm_anyvector_ty,
2249+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
2250+
llvm_i32_ty]>;
2251+
def int_vp_reduce_fminimum : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
2252+
[ LLVMVectorElementType<0>,
2253+
llvm_anyvector_ty,
2254+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
2255+
llvm_i32_ty]>;
22462256
}
22472257

22482258
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>] in {

llvm/include/llvm/IR/VPIntrinsics.def

+8
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,14 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
701701
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
702702
vector_reduce_fmin)
703703

704+
// llvm.vp.reduce.fmaximum(start,x,mask,vlen)
705+
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmaximum, VP_REDUCE_FMAXIMUM,
706+
vector_reduce_fmaximum)
707+
708+
// llvm.vp.reduce.fminimum(start,x,mask,vlen)
709+
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fminimum, VP_REDUCE_FMINIMUM,
710+
vector_reduce_fminimum)
711+
704712
#undef HELPER_REGISTER_REDUCTION_VP
705713

706714
// Specialized helper macro for VP reductions as above but with two forms:

llvm/lib/CodeGen/ExpandVectorPredication.cpp

+21-3
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,8 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
367367
Type *EltTy) {
368368
bool Negative = false;
369369
unsigned EltBits = EltTy->getScalarSizeInBits();
370-
switch (VPI.getIntrinsicID()) {
370+
Intrinsic::ID VID = VPI.getIntrinsicID();
371+
switch (VID) {
371372
default:
372373
llvm_unreachable("Expecting a VP reduction intrinsic");
373374
case Intrinsic::vp_reduce_add:
@@ -387,12 +388,17 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
387388
return ConstantInt::get(EltTy->getContext(),
388389
APInt::getSignedMinValue(EltBits));
389390
case Intrinsic::vp_reduce_fmax:
391+
case Intrinsic::vp_reduce_fmaximum:
390392
Negative = true;
391393
[[fallthrough]];
392-
case Intrinsic::vp_reduce_fmin: {
394+
case Intrinsic::vp_reduce_fmin:
395+
case Intrinsic::vp_reduce_fminimum: {
396+
bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum ||
397+
VID == Intrinsic::vp_reduce_fmaximum;
393398
FastMathFlags Flags = VPI.getFastMathFlags();
394399
const fltSemantics &Semantics = EltTy->getFltSemantics();
395-
return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
400+
return (!Flags.noNaNs() && !PropagatesNaN)
401+
? ConstantFP::getQNaN(EltTy, Negative)
396402
: !Flags.noInfs()
397403
? ConstantFP::getInfinity(EltTy, Negative)
398404
: ConstantFP::get(EltTy,
@@ -480,6 +486,18 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
480486
Reduction =
481487
Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
482488
break;
489+
case Intrinsic::vp_reduce_fmaximum:
490+
Reduction = Builder.CreateFPMaximumReduce(RedOp);
491+
transferDecorations(*Reduction, VPI);
492+
Reduction =
493+
Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start);
494+
break;
495+
case Intrinsic::vp_reduce_fminimum:
496+
Reduction = Builder.CreateFPMinimumReduce(RedOp);
497+
transferDecorations(*Reduction, VPI);
498+
Reduction =
499+
Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start);
500+
break;
483501
case Intrinsic::vp_reduce_fadd:
484502
Reduction = Builder.CreateFAddReduce(Start, RedOp);
485503
break;

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
12221222
case ISD::VP_REDUCE_UMIN:
12231223
case ISD::VP_REDUCE_FMAX:
12241224
case ISD::VP_REDUCE_FMIN:
1225+
case ISD::VP_REDUCE_FMAXIMUM:
1226+
case ISD::VP_REDUCE_FMINIMUM:
12251227
case ISD::VP_REDUCE_SEQ_FADD:
12261228
case ISD::VP_REDUCE_SEQ_FMUL:
12271229
Action = TLI.getOperationAction(
@@ -5015,6 +5017,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
50155017
Node->getOpcode() == ISD::VP_REDUCE_FMUL ||
50165018
Node->getOpcode() == ISD::VP_REDUCE_FMAX ||
50175019
Node->getOpcode() == ISD::VP_REDUCE_FMIN ||
5020+
Node->getOpcode() == ISD::VP_REDUCE_FMAXIMUM ||
5021+
Node->getOpcode() == ISD::VP_REDUCE_FMINIMUM ||
50185022
Node->getOpcode() == ISD::VP_REDUCE_SEQ_FADD)
50195023
OVT = Node->getOperand(1).getSimpleValueType();
50205024
if (Node->getOpcode() == ISD::BR_CC ||
@@ -5687,6 +5691,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
56875691
case ISD::VP_REDUCE_FMUL:
56885692
case ISD::VP_REDUCE_FMAX:
56895693
case ISD::VP_REDUCE_FMIN:
5694+
case ISD::VP_REDUCE_FMAXIMUM:
5695+
case ISD::VP_REDUCE_FMINIMUM:
56905696
case ISD::VP_REDUCE_SEQ_FADD:
56915697
Results.push_back(PromoteReduction(Node));
56925698
break;

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -3148,6 +3148,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
31483148
case ISD::VP_REDUCE_UMIN:
31493149
case ISD::VP_REDUCE_FMAX:
31503150
case ISD::VP_REDUCE_FMIN:
3151+
case ISD::VP_REDUCE_FMAXIMUM:
3152+
case ISD::VP_REDUCE_FMINIMUM:
31513153
Res = SplitVecOp_VP_REDUCE(N, OpNo);
31523154
break;
31533155
case ISD::VP_CTTZ_ELTS:
@@ -6251,6 +6253,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
62516253
case ISD::VP_REDUCE_UMIN:
62526254
case ISD::VP_REDUCE_FMAX:
62536255
case ISD::VP_REDUCE_FMIN:
6256+
case ISD::VP_REDUCE_FMAXIMUM:
6257+
case ISD::VP_REDUCE_FMINIMUM:
62546258
Res = WidenVecOp_VP_REDUCE(N);
62556259
break;
62566260
case ISD::VP_CTTZ_ELTS:

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
470470
case ISD::VP_REDUCE_FMIN:
471471
return ISD::FMINNUM;
472472
case ISD::VECREDUCE_FMAXIMUM:
473+
case ISD::VP_REDUCE_FMAXIMUM:
473474
return ISD::FMAXIMUM;
474475
case ISD::VECREDUCE_FMINIMUM:
476+
case ISD::VP_REDUCE_FMINIMUM:
475477
return ISD::FMINIMUM;
476478
}
477479
}

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+35-5
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
713713
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714714
ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715715
ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716-
ISD::EXPERIMENTAL_VP_SPLICE};
716+
ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
717+
ISD::VP_REDUCE_FMAXIMUM};
717718

718719
static const unsigned IntegerVecReduceOps[] = {
719720
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -958,7 +959,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
958959
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
959960
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
960961
ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
961-
ISD::VP_FMAXIMUM};
962+
ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
962963

963964
// Sets common operation actions on RVV floating-point vector types.
964965
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -6661,6 +6662,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
66616662
case ISD::VP_REDUCE_SEQ_FADD:
66626663
case ISD::VP_REDUCE_FMIN:
66636664
case ISD::VP_REDUCE_FMAX:
6665+
case ISD::VP_REDUCE_FMINIMUM:
6666+
case ISD::VP_REDUCE_FMAXIMUM:
66646667
if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
66656668
(Subtarget.hasVInstructionsF16Minimal() &&
66666669
!Subtarget.hasVInstructionsF16()))
@@ -9526,8 +9529,10 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
95269529
case ISD::VP_REDUCE_SEQ_FADD:
95279530
return RISCVISD::VECREDUCE_SEQ_FADD_VL;
95289531
case ISD::VP_REDUCE_FMAX:
9532+
case ISD::VP_REDUCE_FMAXIMUM:
95299533
return RISCVISD::VECREDUCE_FMAX_VL;
95309534
case ISD::VP_REDUCE_FMIN:
9535+
case ISD::VP_REDUCE_FMINIMUM:
95319536
return RISCVISD::VECREDUCE_FMIN_VL;
95329537
}
95339538

@@ -9786,16 +9791,19 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
97869791
SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
97879792
SelectionDAG &DAG) const {
97889793
SDLoc DL(Op);
9794+
unsigned Opc = Op.getOpcode();
9795+
SDValue Start = Op.getOperand(0);
97899796
SDValue Vec = Op.getOperand(1);
97909797
EVT VecEVT = Vec.getValueType();
9798+
MVT XLenVT = Subtarget.getXLenVT();
97919799

97929800
// TODO: The type may need to be widened rather than split. Or widened before
97939801
// it can be split.
97949802
if (!isTypeLegal(VecEVT))
97959803
return SDValue();
97969804

97979805
MVT VecVT = VecEVT.getSimpleVT();
9798-
unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9806+
unsigned RVVOpcode = getRVVReductionOp(Opc);
97999807

98009808
if (VecVT.isFixedLengthVector()) {
98019809
auto ContainerVT = getContainerForFixedLengthVector(VecVT);
@@ -9804,8 +9812,30 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
98049812

98059813
SDValue VL = Op.getOperand(3);
98069814
SDValue Mask = Op.getOperand(2);
9807-
return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9808-
Vec, Mask, VL, DL, DAG, Subtarget);
9815+
SDValue Res =
9816+
lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9817+
Vec, Mask, VL, DL, DAG, Subtarget);
9818+
if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9819+
Op->getFlags().hasNoNaNs())
9820+
return Res;
9821+
9822+
// Propagate NaNs.
9823+
MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9824+
// Check if any of the elements in Vec is NaN.
9825+
SDValue IsNaN = DAG.getNode(
9826+
RISCVISD::SETCC_VL, DL, PredVT,
9827+
{Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9828+
SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9829+
// Check if the start value is NaN.
9830+
SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9831+
VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9832+
SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9833+
DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9834+
MVT ResVT = Res.getSimpleValueType();
9835+
return DAG.getSelect(
9836+
DL, ResVT, NoNaNs, Res,
9837+
DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL,
9838+
ResVT));
98099839
}
98109840

98119841
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

0 commit comments

Comments
 (0)