Skip to content

Commit 2a95651

Browse files
authored
[CIR] [Lowering] [X86_64] Support VAArg for LongDouble (#1150)
Recommit #1101 I am not sure what happened. But that merged PR doesn't show in the git log. Maybe the stacked PR may not get successed? But after all, we need to land it again. Following off are original commit messages: --- This is the following of #1100. After #1100, when we want to use LongDouble for VAArg, we will be in trouble due to details in X86_64's ABI and this patch tries to address this. The practical impact the patch is, after this patch, with #1088 and a small following up fix, we can build and run all C's benchmark in SpecCPU 2017. I think it is a milestone.
1 parent 03bb2e0 commit 2a95651

File tree

7 files changed

+186
-8
lines changed

7 files changed

+186
-8
lines changed

clang/lib/CIR/Dialect/IR/CIRTypes.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const {
745745
llvm::TypeSize
746746
FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
747747
mlir::DataLayoutEntryListRef params) const {
748-
return llvm::TypeSize::getFixed(16);
748+
return llvm::TypeSize::getFixed(128);
749749
}
750750

751751
uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout,
@@ -766,6 +766,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const {
766766
llvm::TypeSize
767767
FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
768768
mlir::DataLayoutEntryListRef params) const {
769+
// FIXME: We probably want it to return 128. But we're lacking a test now.
769770
return llvm::TypeSize::getFixed(16);
770771
}
771772

clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT,
6565
return CXXABI.getRecordArgABI(RT);
6666
}
6767

68+
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) {
69+
auto st = mlir::dyn_cast<StructType>(ty);
70+
if (!st)
71+
return CIRCXXABI::RAA_Default;
72+
return getRecordArgABI(st, CXXABI);
73+
}
74+
6875
} // namespace cir

clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder,
3333
mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty);
3434

3535
CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI);
36+
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI);
3637

3738
} // namespace cir
3839

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ std::unique_ptr<cir::LowerModule> getLowerModule(cir::VAArgOp op) {
4646
mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
4747
if (!mo)
4848
return nullptr;
49-
5049
mlir::PatternRewriter rewriter(mo.getContext());
5150
return cir::createLowerModule(mo, rewriter);
5251
}
@@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
9291
// Let's hope LLVM's va_arg instruction can take care of it.
9392
// Remove this when X86_64ABIInfo::classify can take care of every type.
9493
if (!mlir::isa<VoidType, IntType, SingleType, DoubleType, BoolType,
95-
StructType>(op.getType()))
94+
StructType, LongDoubleType>(op.getType()))
9695
return nullptr;
9796

9897
// Assume that va_list type is correct; should be pointer to LLVM type:
@@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
107106
std::unique_ptr<cir::LowerModule> lowerModule = getLowerModule(op);
108107
if (!lowerModule)
109108
return nullptr;
110-
111109
mlir::Type ty = op.getType();
112110

113111
// FIXME: How should we access the X86AVXABILevel?
@@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
167165
mlir::Block *contBlock = currentBlock->splitBlock(op);
168166
mlir::Block *inRegBlock = builder.createBlock(contBlock);
169167
mlir::Block *inMemBlock = builder.createBlock(contBlock);
170-
171168
builder.setInsertionPointToEnd(currentBlock);
172169
builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);
173170

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp

+115-3
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
165165
Current = Class::SSE;
166166
return;
167167

168+
} else if (mlir::isa<LongDoubleType>(Ty)) {
169+
const llvm::fltSemantics *LDF =
170+
&getContext().getTargetInfo().getLongDoubleFormat();
171+
if (LDF == &llvm::APFloat::IEEEquad()) {
172+
Lo = Class::SSE;
173+
Hi = Class::SSEUp;
174+
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
175+
Lo = Class::X87;
176+
Hi = Class::X87Up;
177+
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
178+
Current = Class::SSE;
179+
} else {
180+
llvm_unreachable("unexpected long double representation!");
181+
}
182+
return;
168183
} else if (mlir::isa<BoolType>(Ty)) {
169184
Current = Class::Integer;
170185
} else if (const auto RT = mlir::dyn_cast<StructType>(Ty)) {
@@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
267282
cir_cconv_unreachable("NYI");
268283
}
269284

285+
ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
286+
unsigned freeIntRegs) const {
287+
// If this is a scalar LLVM value then assume LLVM will pass it in the right
288+
// place naturally.
289+
//
290+
// This assumption is optimistic, as there could be free registers available
291+
// when we need to pass this argument in memory, and LLVM could try to pass
292+
// the argument in the free register. This does not seem to happen currently,
293+
// but this code would be much safer if we could mark the argument with
294+
// 'onstack'. See PR12193.
295+
if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
296+
/*!Ty->isBitIntType()*/) {
297+
// FIXME: Handling enum type?
298+
299+
return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
300+
: ABIArgInfo::getDirect());
301+
}
302+
303+
if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
304+
return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);
305+
306+
// Compute the byval alignment. We specify the alignment of the byval in all
307+
// cases so that the mid-level optimizer knows the alignment of the byval.
308+
unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);
309+
310+
// Attempt to avoid passing indirect results using byval when possible. This
311+
// is important for good codegen.
312+
//
313+
// We do this by coercing the value into a scalar type which the backend can
314+
// handle naturally (i.e., without using byval).
315+
//
316+
// For simplicity, we currently only do this when we have exhausted all of the
317+
// free integer registers. Doing this when there are free integer registers
318+
// would require more care, as we would have to ensure that the coerced value
319+
// did not claim the unused register. That would require either reording the
320+
// arguments to the function (so that any subsequent inreg values came first),
321+
// or only doing this optimization when there were no following arguments that
322+
// might be inreg.
323+
//
324+
// We currently expect it to be rare (particularly in well written code) for
325+
// arguments to be passed on the stack when there are still free integer
326+
// registers available (this would typically imply large structs being passed
327+
// by value), so this seems like a fair tradeoff for now.
328+
//
329+
// We can revisit this if the backend grows support for 'onstack' parameter
330+
// attributes. See PR12193.
331+
if (freeIntRegs == 0) {
332+
uint64_t size = getContext().getTypeSize(ty);
333+
334+
// If this type fits in an eightbyte, coerce it into the matching integral
335+
// type, which will end up on the stack (with alignment 8).
336+
if (align == 8 && size <= 64)
337+
return ABIArgInfo::getDirect(
338+
cir::IntType::get(LT.getMLIRContext(), size, false));
339+
}
340+
341+
return ABIArgInfo::getIndirect(align);
342+
}
343+
270344
/// Return a type that will be passed by the backend in the low 8 bytes of an
271345
/// XMM register, corresponding to the SSE class.
272346
mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
@@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
278352
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
279353
mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
280354
if (!T0 || mlir::isa<mlir::Float64Type>(T0))
281-
return T0; // NOTE(cir): Not sure if this is correct.
355+
return cir::DoubleType::get(LT.getMLIRContext());
282356

283357
mlir::Type T1 = {};
284358
unsigned T0Size = TD.getTypeAllocSize(T0);
@@ -296,6 +370,8 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
296370
return T0;
297371
}
298372

373+
return cir::DoubleType::get(LT.getMLIRContext());
374+
299375
cir_cconv_unreachable("NYI");
300376
}
301377

@@ -539,13 +615,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
539615
++neededSSE;
540616
break;
541617
}
618+
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
619+
// on the stack.
620+
case Class::Memory:
621+
622+
// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
623+
// COMPLEX_X87, it is passed in memory.
624+
case Class::X87:
625+
case Class::ComplexX87:
626+
if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
627+
++neededInt;
628+
return getIndirectResult(Ty, freeIntRegs);
629+
630+
case Class::SSEUp:
631+
case Class::X87Up:
632+
llvm_unreachable("Invalid classification for lo word.");
633+
542634
default:
543635
cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(),
544636
"NYI");
545637
}
546638

547639
mlir::Type HighPart = {};
548640
switch (Hi) {
641+
case Class::Memory:
642+
case Class::X87:
643+
case Class::ComplexX87:
644+
llvm_unreachable("Invalid classification for hi word.");
645+
549646
case Class::NoClass:
550647
break;
551648

@@ -558,8 +655,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
558655
return ABIArgInfo::getDirect(HighPart, 8);
559656
break;
560657

561-
default:
562-
cir_cconv_unreachable("NYI");
658+
// X87Up generally doesn't occur here (long double is passed in
659+
// memory), except in situations involving unions.
660+
case Class::X87Up:
661+
case Class::SSE:
662+
++neededSSE;
663+
HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);
664+
665+
if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
666+
return ABIArgInfo::getDirect(HighPart, 8);
667+
break;
668+
669+
// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
670+
// eightbyte is passed in the upper half of the last used SSE
671+
// register. This only happens when 128-bit vectors are passed.
672+
case Class::SSEUp:
673+
llvm_unreachable("NYI && We need to implement GetByteVectorType");
674+
break;
563675
}
564676

565677
// If a high part was specified, merge it together with the low part. It is

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h

+8
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo {
6969
mlir::Type SourceTy,
7070
unsigned SourceOffset) const;
7171

72+
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
73+
/// such that the argument will be passed in memory.
74+
///
75+
/// \param freeIntRegs - The number of free integer registers remaining
76+
/// available.
77+
::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
78+
unsigned freeIntRegs) const;
79+
7280
/// The 0.98 ABI revision clarified a lot of ambiguities,
7381
/// unfortunately in ways that were not always consistent with
7482
/// certain previous compilers. In particular, platforms which

clang/test/CIR/Lowering/var-arg-x86_64.c

+52
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,55 @@ double f1(int n, ...) {
7676
// CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]]
7777
// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]]
7878
// CIR: store [[CASTED_ARG]], [[RES]]
79+
long double f2(int n, ...) {
80+
va_list valist;
81+
va_start(valist, n);
82+
long double res = va_arg(valist, long double);
83+
va_end(valist);
84+
return res;
85+
}
86+
87+
// CHECK: define {{.*}}@f2
88+
// CHECK: [[RESULT:%.+]] = alloca x86_fp80
89+
// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
90+
// CHECK: [[RES:%.+]] = alloca x86_fp80
91+
// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
92+
// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
93+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
94+
// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
95+
// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]]
96+
// Ptr Mask Operations
97+
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15
98+
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32
99+
// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16
100+
// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]]
101+
// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]]
102+
// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16
103+
// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
104+
// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]]
105+
// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
106+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
107+
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
108+
// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
109+
// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
110+
// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
111+
// CHECK: ret x86_fp80 [[RETURN_VALUE]]
112+
113+
// CIR: cir.func @f2
114+
// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["valist"]
115+
// CIR: [[RES:%.+]] = cir.alloca !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>>, ["res"
116+
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
117+
// CIR: cir.va.start [[VASTED_VA_LIST]]
118+
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
119+
// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
120+
// CIR-DAG: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]]
121+
// CIR-DAG: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr<!void>)
122+
// CIR-DAG: [[CONSTANT:%.+]] = cir.const #cir.int<15>
123+
// CIR-DAG: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]]
124+
// CIR-DAG: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16>
125+
// CIR-DAG: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]]
126+
// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16>
127+
// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr<!u8i>), !cir.ptr<!cir.long_double<!cir.f80>>
128+
// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]]
129+
// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]]
130+
// CIR. cir.via.end

0 commit comments

Comments
 (0)