Skip to content

Commit 4ceef42

Browse files
authored
[CIR] [Lowering] [X86_64] Support VAArg for LongDouble (#1101)
This is the following of #1100. After #1100, when we want to use LongDouble for VAArg, we will be in trouble due to details in X86_64's ABI and this patch tries to address this. The practical impact the patch is, after this patch, with #1088 and a small following up fix, we can build and run all C's benchmark in SpecCPU 2017. I think it is a milestone.
1 parent ef39145 commit 4ceef42

File tree

8 files changed

+197
-8
lines changed

8 files changed

+197
-8
lines changed

clang/lib/CIR/Dialect/IR/CIRTypes.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const {
747747
llvm::TypeSize
748748
FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
749749
mlir::DataLayoutEntryListRef params) const {
750-
return llvm::TypeSize::getFixed(16);
750+
return llvm::TypeSize::getFixed(128);
751751
}
752752

753753
uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout,
@@ -768,6 +768,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const {
768768
llvm::TypeSize
769769
FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
770770
mlir::DataLayoutEntryListRef params) const {
771+
// FIXME: We probably want it to return 128. But we're lacking a test now.
771772
return llvm::TypeSize::getFixed(16);
772773
}
773774

clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT,
6565
return CXXABI.getRecordArgABI(RT);
6666
}
6767

68+
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) {
69+
auto sTy = mlir::dyn_cast<StructType>(ty);
70+
if (!sTy)
71+
return CIRCXXABI::RAA_Default;
72+
return getRecordArgABI(sTy, CXXABI);
73+
}
74+
6875
} // namespace cir

clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder,
3333
mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty);
3434

3535
CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI);
36+
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI);
3637

3738
} // namespace cir
3839

clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,18 @@ clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const mlir::Type T) const {
9494
Align = Target->getDoubleAlign();
9595
break;
9696
}
97+
if (auto longDoubleTy = mlir::dyn_cast<LongDoubleType>(T)) {
98+
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
99+
(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
100+
Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
101+
Width = AuxTarget->getLongDoubleWidth();
102+
Align = AuxTarget->getLongDoubleAlign();
103+
} else {
104+
Width = Target->getLongDoubleWidth();
105+
Align = Target->getLongDoubleAlign();
106+
}
107+
break;
108+
}
97109
cir_cconv_unreachable("Unknown builtin type!");
98110
break;
99111
}

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ std::unique_ptr<cir::LowerModule> getLowerModule(cir::VAArgOp op) {
4646
mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
4747
if (!mo)
4848
return nullptr;
49-
5049
mlir::PatternRewriter rewriter(mo.getContext());
5150
return cir::createLowerModule(mo, rewriter);
5251
}
@@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
9291
// Let's hope LLVM's va_arg instruction can take care of it.
9392
// Remove this when X86_64ABIInfo::classify can take care of every type.
9493
if (!mlir::isa<VoidType, IntType, SingleType, DoubleType, BoolType,
95-
StructType>(op.getType()))
94+
StructType, LongDoubleType>(op.getType()))
9695
return nullptr;
9796

9897
// Assume that va_list type is correct; should be pointer to LLVM type:
@@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
107106
std::unique_ptr<cir::LowerModule> lowerModule = getLowerModule(op);
108107
if (!lowerModule)
109108
return nullptr;
110-
111109
mlir::Type ty = op.getType();
112110

113111
// FIXME: How should we access the X86AVXABILevel?
@@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
167165
mlir::Block *contBlock = currentBlock->splitBlock(op);
168166
mlir::Block *inRegBlock = builder.createBlock(contBlock);
169167
mlir::Block *inMemBlock = builder.createBlock(contBlock);
170-
171168
builder.setInsertionPointToEnd(currentBlock);
172169
builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);
173170

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp

+113-3
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
165165
Current = Class::SSE;
166166
return;
167167

168+
} else if (mlir::isa<LongDoubleType>(Ty)) {
169+
const llvm::fltSemantics *LDF =
170+
&getContext().getTargetInfo().getLongDoubleFormat();
171+
if (LDF == &llvm::APFloat::IEEEquad()) {
172+
Lo = Class::SSE;
173+
Hi = Class::SSEUp;
174+
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
175+
Lo = Class::X87;
176+
Hi = Class::X87Up;
177+
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
178+
Current = Class::SSE;
179+
} else {
180+
llvm_unreachable("unexpected long double representation!");
181+
}
182+
return;
168183
} else if (mlir::isa<BoolType>(Ty)) {
169184
Current = Class::Integer;
170185
} else if (const auto RT = mlir::dyn_cast<StructType>(Ty)) {
@@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
267282
cir_cconv_unreachable("NYI");
268283
}
269284

285+
ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
286+
unsigned freeIntRegs) const {
287+
// If this is a scalar LLVM value then assume LLVM will pass it in the right
288+
// place naturally.
289+
//
290+
// This assumption is optimistic, as there could be free registers available
291+
// when we need to pass this argument in memory, and LLVM could try to pass
292+
// the argument in the free register. This does not seem to happen currently,
293+
// but this code would be much safer if we could mark the argument with
294+
// 'onstack'. See PR12193.
295+
if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
296+
/*!Ty->isBitIntType()*/) {
297+
// FIXME: Handling enum type?
298+
299+
return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
300+
: ABIArgInfo::getDirect());
301+
}
302+
303+
if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
304+
return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);
305+
306+
// Compute the byval alignment. We specify the alignment of the byval in all
307+
// cases so that the mid-level optimizer knows the alignment of the byval.
308+
unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);
309+
310+
// Attempt to avoid passing indirect results using byval when possible. This
311+
// is important for good codegen.
312+
//
313+
// We do this by coercing the value into a scalar type which the backend can
314+
// handle naturally (i.e., without using byval).
315+
//
316+
// For simplicity, we currently only do this when we have exhausted all of the
317+
// free integer registers. Doing this when there are free integer registers
318+
// would require more care, as we would have to ensure that the coerced value
319+
// did not claim the unused register. That would require either reording the
320+
// arguments to the function (so that any subsequent inreg values came first),
321+
// or only doing this optimization when there were no following arguments that
322+
// might be inreg.
323+
//
324+
// We currently expect it to be rare (particularly in well written code) for
325+
// arguments to be passed on the stack when there are still free integer
326+
// registers available (this would typically imply large structs being passed
327+
// by value), so this seems like a fair tradeoff for now.
328+
//
329+
// We can revisit this if the backend grows support for 'onstack' parameter
330+
// attributes. See PR12193.
331+
if (freeIntRegs == 0) {
332+
uint64_t size = getContext().getTypeSize(ty);
333+
334+
// If this type fits in an eightbyte, coerce it into the matching integral
335+
// type, which will end up on the stack (with alignment 8).
336+
if (align == 8 && size <= 64)
337+
return ABIArgInfo::getDirect(
338+
cir::IntType::get(LT.getMLIRContext(), size, false));
339+
}
340+
341+
return ABIArgInfo::getIndirect(align);
342+
}
343+
270344
/// Return a type that will be passed by the backend in the low 8 bytes of an
271345
/// XMM register, corresponding to the SSE class.
272346
mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
@@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
278352
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
279353
mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
280354
if (!T0 || mlir::isa<mlir::Float64Type>(T0))
281-
return T0; // NOTE(cir): Not sure if this is correct.
355+
return cir::DoubleType::get(LT.getMLIRContext());
282356

283357
mlir::Type T1 = {};
284358
unsigned T0Size = TD.getTypeAllocSize(T0);
@@ -539,13 +613,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
539613
++neededSSE;
540614
break;
541615
}
616+
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
617+
// on the stack.
618+
case Class::Memory:
619+
620+
// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
621+
// COMPLEX_X87, it is passed in memory.
622+
case Class::X87:
623+
case Class::ComplexX87:
624+
if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
625+
++neededInt;
626+
return getIndirectResult(Ty, freeIntRegs);
627+
628+
case Class::SSEUp:
629+
case Class::X87Up:
630+
llvm_unreachable("Invalid classification for lo word.");
631+
542632
default:
543633
cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(),
544634
"NYI");
545635
}
546636

547637
mlir::Type HighPart = {};
548638
switch (Hi) {
639+
case Class::Memory:
640+
case Class::X87:
641+
case Class::ComplexX87:
642+
llvm_unreachable("Invalid classification for hi word.");
643+
549644
case Class::NoClass:
550645
break;
551646

@@ -558,8 +653,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
558653
return ABIArgInfo::getDirect(HighPart, 8);
559654
break;
560655

561-
default:
562-
cir_cconv_unreachable("NYI");
656+
// X87Up generally doesn't occur here (long double is passed in
657+
// memory), except in situations involving unions.
658+
case Class::X87Up:
659+
case Class::SSE:
660+
++neededSSE;
661+
HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);
662+
663+
if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
664+
return ABIArgInfo::getDirect(HighPart, 8);
665+
break;
666+
667+
// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
668+
// eightbyte is passed in the upper half of the last used SSE
669+
// register. This only happens when 128-bit vectors are passed.
670+
case Class::SSEUp:
671+
llvm_unreachable("NYI && We need to implement GetByteVectorType");
672+
break;
563673
}
564674

565675
// If a high part was specified, merge it together with the low part. It is

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h

+8
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo {
6969
mlir::Type SourceTy,
7070
unsigned SourceOffset) const;
7171

72+
/// Give a source type \arg Ty, return a suitable result such that the
73+
/// argument will be passed in memory.
74+
///
75+
/// \param freeIntRegs - The number of free integer registers remaining
76+
/// available.
77+
::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
78+
unsigned freeIntRegs) const;
79+
7280
/// The 0.98 ABI revision clarified a lot of ambiguities,
7381
/// unfortunately in ways that were not always consistent with
7482
/// certain previous compilers. In particular, platforms which

clang/test/CIR/Lowering/var-arg-x86_64.c

+53
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// REQUIRES: system-linux
12
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
23
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
34
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
@@ -76,3 +77,55 @@ double f1(int n, ...) {
7677
// CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]]
7778
// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]]
7879
// CIR: store [[CASTED_ARG]], [[RES]]
80+
long double f2(int n, ...) {
81+
va_list valist;
82+
va_start(valist, n);
83+
long double res = va_arg(valist, long double);
84+
va_end(valist);
85+
return res;
86+
}
87+
88+
// CHECK: define {{.*}}@f2
89+
// CHECK: [[RESULT:%.+]] = alloca x86_fp80
90+
// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
91+
// CHECK: [[RES:%.+]] = alloca x86_fp80
92+
// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
93+
// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
94+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
95+
// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
96+
// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]]
97+
// Ptr Mask Operations
98+
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15
99+
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32
100+
// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16
101+
// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]]
102+
// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]]
103+
// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16
104+
// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
105+
// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]]
106+
// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
107+
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
108+
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
109+
// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
110+
// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
111+
// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
112+
// CHECK: ret x86_fp80 [[RETURN_VALUE]]
113+
114+
// CIR: cir.func @f2
115+
// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["valist"]
116+
// CIR: [[RES:%.+]] = cir.alloca !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>>, ["res"
117+
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
118+
// CIR: cir.va.start [[VASTED_VA_LIST]]
119+
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
120+
// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
121+
// CIR: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]]
122+
// CIR: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr<!void>)
123+
// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<15>
124+
// CIR: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]]
125+
// CIR: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16>
126+
// CIR: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]]
127+
// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16>
128+
// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr<!u8i>), !cir.ptr<!cir.long_double<!cir.f80>>
129+
// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]]
130+
// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]]
131+
// CIR. cir.via.end

0 commit comments

Comments
 (0)