1111#include " LowerTypes.h"
1212#include " TargetInfo.h"
1313#include " TargetLoweringInfo.h"
14+ #include " mlir/Dialect/LLVMIR/LLVMAttrs.h"
15+ #include " clang/Basic/AddressSpaces.h"
1416#include " clang/CIR/ABIArgInfo.h"
17+ #include " clang/CIR/Dialect/IR/CIRAttrs.h"
1518#include " clang/CIR/Dialect/IR/CIROpsEnums.h"
1619#include " clang/CIR/Dialect/IR/CIRTypes.h"
1720#include " clang/CIR/MissingFeatures.h"
@@ -34,9 +37,19 @@ class AMDGPUABIInfo : public ABIInfo {
3437 AMDGPUABIInfo (LowerTypes <) : ABIInfo(lt) {}
3538
3639private:
37- void computeInfo (LowerFunctionInfo &fi) const override {
38- llvm_unreachable (" NYI" );
39- }
40+ static const unsigned maxNumRegsForArgsRet = 16 ;
41+
42+ unsigned numRegsForType (mlir::Type ty) const ;
43+
44+ // Coerce HIP scalar pointer arguments from generic pointers to global ones.
45+ mlir::Type coerceKernelArgumentType (mlir::Type ty, unsigned fromAS,
46+ unsigned toAS) const ;
47+
48+ ABIArgInfo classifyReturnType (mlir::Type ty) const ;
49+ ABIArgInfo classifyArgumentType (mlir::Type ty, bool variadic,
50+ unsigned &numRegsLeft) const ;
51+ ABIArgInfo classifyKernelArgumentType (mlir::Type ty) const ;
52+ void computeInfo (LowerFunctionInfo &fi) const override ;
4053};
4154
4255class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
@@ -63,7 +76,123 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
6376 }
6477};
6578
79+ // Estimate the number of registers the type will use
80+ unsigned AMDGPUABIInfo::numRegsForType (mlir::Type ty) const {
81+ if (isAggregateTypeForABI (ty)) {
82+ llvm_unreachable (" numRegsForType for aggregate types is NYI for AMDGPU" );
83+ }
84+
85+ uint64_t size = getContext ().getTypeSize (ty);
86+ return (size + 31 ) / 32 ;
87+ }
88+
89+ // Coerce HIP scalar pointer arguments from generic pointers to global ones.
90+ mlir::Type AMDGPUABIInfo::coerceKernelArgumentType (mlir::Type ty,
91+ unsigned fromAS,
92+ unsigned toAS) const {
93+ if (auto ptrTy = mlir::dyn_cast<cir::PointerType>(ty)) {
94+ mlir::Attribute addrSpaceAttr = ptrTy.getAddrSpace ();
95+ unsigned currentAS = 0 ;
96+ // Get the current address space.
97+ if (auto targetAS = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>(
98+ addrSpaceAttr))
99+ currentAS = targetAS.getValue ();
100+ // If currentAS is same as the FromAS, coerce it to the ToAS.
101+ if (currentAS == fromAS) {
102+ auto newAddrSpaceAttr =
103+ cir::TargetAddressSpaceAttr::get (ty.getContext (), toAS);
104+ return cir::PointerType::get (ptrTy.getPointee (), newAddrSpaceAttr);
105+ }
106+ }
107+ return ty;
108+ }
109+
110+ ABIArgInfo AMDGPUABIInfo::classifyReturnType (mlir::Type ty) const {
111+ if (isAggregateTypeForABI (ty)) {
112+ llvm_unreachable (
113+ " classifyReturnType for aggregate types is NYI for AMDGPU" );
114+ }
115+
116+ return isPromotableIntegerTypeForABI (ty) ? ABIArgInfo::getExtend (ty)
117+ : ABIArgInfo::getDirect ();
118+ }
119+
120+ ABIArgInfo AMDGPUABIInfo::classifyArgumentType (mlir::Type ty, bool variadic,
121+ unsigned &numRegsLeft) const {
122+ assert (numRegsLeft <= maxNumRegsForArgsRet && " register estimate underflow" );
123+
124+ ty = useFirstFieldIfTransparentUnion (ty);
125+
126+ if (isAggregateTypeForABI (ty)) {
127+ llvm_unreachable (
128+ " classifyArgumentType for aggregate types is NYI for AMDGPU" );
129+ }
130+
131+ if (variadic) {
132+ return ABIArgInfo::getDirect (nullptr , 0 , nullptr , false , 0 );
133+ }
134+
135+ ABIArgInfo argInfo =
136+ (isPromotableIntegerTypeForABI (ty) ? ABIArgInfo::getExtend (ty)
137+ : ABIArgInfo::getDirect ());
138+
139+ // Track register usage
140+ if (!argInfo.isIndirect ()) {
141+ unsigned numRegs = numRegsForType (ty);
142+ numRegsLeft -= std::min (numRegs, numRegsLeft);
143+ }
144+
145+ return argInfo;
146+ }
147+
148+ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType (mlir::Type ty) const {
149+ ty = useFirstFieldIfTransparentUnion (ty);
150+
151+ // Aggregate types are not yet supported
152+ if (isAggregateTypeForABI (ty)) {
153+ llvm_unreachable (" Aggregate types NYI for AMDGPU kernel arguments" );
154+ }
155+
156+ mlir::Type origTy = ty;
157+ mlir::Type coercedTy = origTy;
158+
159+ // For HIP, coerce pointer arguments from generic to global
160+ if (getContext ().getLangOpts ().HIP ) {
161+ unsigned genericAS =
162+ getTarget ().getTargetAddressSpace (clang::LangAS::Default);
163+ unsigned globalAS =
164+ getTarget ().getTargetAddressSpace (clang::LangAS::cuda_device);
165+ coercedTy = coerceKernelArgumentType (origTy, genericAS, globalAS);
166+ }
167+
168+ // If we set CanBeFlattened to true, CodeGen will expand the struct to its
169+ // individual elements, which confuses the Clover OpenCL backend; therefore we
170+ // have to set it to false here. Other args of getDirect() are just defaults.
171+ return ABIArgInfo::getDirect (coercedTy, 0 , nullptr , false );
172+ }
173+
174+ void AMDGPUABIInfo::computeInfo (LowerFunctionInfo &fi) const {
175+ const unsigned cc = fi.getCallingConvention ();
176+
177+ if (!getCXXABI ().classifyReturnType (fi))
178+ fi.getReturnInfo () = classifyReturnType (fi.getReturnType ());
179+
180+ unsigned argumentIndex = 0 ;
181+ const unsigned numFixedArguments = fi.getNumRequiredArgs ();
182+
183+ unsigned numRegsLeft = maxNumRegsForArgsRet;
184+ for (auto &arg : fi.arguments ()) {
185+ if (cc == static_cast <unsigned >(llvm::CallingConv::AMDGPU_KERNEL)) {
186+ arg.info = classifyKernelArgumentType (arg.type );
187+ } else {
188+ bool fixedArgument = argumentIndex++ < numFixedArguments;
189+ arg.info = classifyArgumentType (arg.type , !fixedArgument, numRegsLeft);
190+ }
191+ }
192+ }
193+
66194} // namespace
195+
67196std::unique_ptr<TargetLoweringInfo>
68197createAMDGPUTargetLoweringInfo (LowerModule &lowerModule) {
69198 return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes ());
0 commit comments