Skip to content

Commit 4f4637c

Browse files
committed
[CIR][AMDGPU] Add basic ABI implementation of AMDGPU for CIR->LLVMIR
1 parent 5237bd4 commit 4f4637c

File tree

4 files changed

+260
-38
lines changed

4 files changed

+260
-38
lines changed

clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp

Lines changed: 132 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
#include "LowerTypes.h"
1212
#include "TargetInfo.h"
1313
#include "TargetLoweringInfo.h"
14+
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
15+
#include "clang/Basic/AddressSpaces.h"
1416
#include "clang/CIR/ABIArgInfo.h"
17+
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
1518
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
1619
#include "clang/CIR/Dialect/IR/CIRTypes.h"
1720
#include "clang/CIR/MissingFeatures.h"
@@ -34,9 +37,19 @@ class AMDGPUABIInfo : public ABIInfo {
3437
AMDGPUABIInfo(LowerTypes &lt) : ABIInfo(lt) {}
3538

3639
private:
37-
void computeInfo(LowerFunctionInfo &fi) const override {
38-
llvm_unreachable("NYI");
39-
}
40+
static const unsigned maxNumRegsForArgsRet = 16;
41+
42+
unsigned numRegsForType(mlir::Type ty) const;
43+
44+
// Coerce HIP scalar pointer arguments from generic pointers to global ones.
45+
mlir::Type coerceKernelArgumentType(mlir::Type ty, unsigned fromAS,
46+
unsigned toAS) const;
47+
48+
ABIArgInfo classifyReturnType(mlir::Type ty) const;
49+
ABIArgInfo classifyArgumentType(mlir::Type ty, bool variadic,
50+
unsigned &numRegsLeft) const;
51+
ABIArgInfo classifyKernelArgumentType(mlir::Type ty) const;
52+
void computeInfo(LowerFunctionInfo &fi) const override;
4053
};
4154

4255
class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
@@ -63,7 +76,123 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
6376
}
6477
};
6578

79+
// Estimate the number of registers the type will use
80+
unsigned AMDGPUABIInfo::numRegsForType(mlir::Type ty) const {
81+
if (isAggregateTypeForABI(ty)) {
82+
llvm_unreachable("numRegsForType for aggregate types is NYI for AMDGPU");
83+
}
84+
85+
uint64_t size = getContext().getTypeSize(ty);
86+
return (size + 31) / 32;
87+
}
88+
89+
// Coerce HIP scalar pointer arguments from generic pointers to global ones.
90+
mlir::Type AMDGPUABIInfo::coerceKernelArgumentType(mlir::Type ty,
91+
unsigned fromAS,
92+
unsigned toAS) const {
93+
if (auto ptrTy = mlir::dyn_cast<cir::PointerType>(ty)) {
94+
mlir::Attribute addrSpaceAttr = ptrTy.getAddrSpace();
95+
unsigned currentAS = 0;
96+
// Get the current address space.
97+
if (auto targetAS = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>(
98+
addrSpaceAttr))
99+
currentAS = targetAS.getValue();
100+
// If currentAS is same as the FromAS, coerce it to the ToAS.
101+
if (currentAS == fromAS) {
102+
auto newAddrSpaceAttr =
103+
cir::TargetAddressSpaceAttr::get(ty.getContext(), toAS);
104+
return cir::PointerType::get(ptrTy.getPointee(), newAddrSpaceAttr);
105+
}
106+
}
107+
return ty;
108+
}
109+
110+
ABIArgInfo AMDGPUABIInfo::classifyReturnType(mlir::Type ty) const {
111+
if (isAggregateTypeForABI(ty)) {
112+
llvm_unreachable(
113+
"classifyReturnType for aggregate types is NYI for AMDGPU");
114+
}
115+
116+
return isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
117+
: ABIArgInfo::getDirect();
118+
}
119+
120+
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(mlir::Type ty, bool variadic,
121+
unsigned &numRegsLeft) const {
122+
assert(numRegsLeft <= maxNumRegsForArgsRet && "register estimate underflow");
123+
124+
ty = useFirstFieldIfTransparentUnion(ty);
125+
126+
if (isAggregateTypeForABI(ty)) {
127+
llvm_unreachable(
128+
"classifyArgumentType for aggregate types is NYI for AMDGPU");
129+
}
130+
131+
if (variadic) {
132+
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, 0);
133+
}
134+
135+
ABIArgInfo argInfo =
136+
(isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
137+
: ABIArgInfo::getDirect());
138+
139+
// Track register usage
140+
if (!argInfo.isIndirect()) {
141+
unsigned numRegs = numRegsForType(ty);
142+
numRegsLeft -= std::min(numRegs, numRegsLeft);
143+
}
144+
145+
return argInfo;
146+
}
147+
148+
ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(mlir::Type ty) const {
149+
ty = useFirstFieldIfTransparentUnion(ty);
150+
151+
// Aggregate types are not yet supported
152+
if (isAggregateTypeForABI(ty)) {
153+
llvm_unreachable("Aggregate types NYI for AMDGPU kernel arguments");
154+
}
155+
156+
mlir::Type origTy = ty;
157+
mlir::Type coercedTy = origTy;
158+
159+
// For HIP, coerce pointer arguments from generic to global
160+
if (getContext().getLangOpts().HIP) {
161+
unsigned genericAS =
162+
getTarget().getTargetAddressSpace(clang::LangAS::Default);
163+
unsigned globalAS =
164+
getTarget().getTargetAddressSpace(clang::LangAS::cuda_device);
165+
coercedTy = coerceKernelArgumentType(origTy, genericAS, globalAS);
166+
}
167+
168+
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
169+
// individual elements, which confuses the Clover OpenCL backend; therefore we
170+
// have to set it to false here. Other args of getDirect() are just defaults.
171+
return ABIArgInfo::getDirect(coercedTy, 0, nullptr, false);
172+
}
173+
174+
void AMDGPUABIInfo::computeInfo(LowerFunctionInfo &fi) const {
175+
const unsigned cc = fi.getCallingConvention();
176+
177+
if (!getCXXABI().classifyReturnType(fi))
178+
fi.getReturnInfo() = classifyReturnType(fi.getReturnType());
179+
180+
unsigned argumentIndex = 0;
181+
const unsigned numFixedArguments = fi.getNumRequiredArgs();
182+
183+
unsigned numRegsLeft = maxNumRegsForArgsRet;
184+
for (auto &arg : fi.arguments()) {
185+
if (cc == static_cast<unsigned>(llvm::CallingConv::AMDGPU_KERNEL)) {
186+
arg.info = classifyKernelArgumentType(arg.type);
187+
} else {
188+
bool fixedArgument = argumentIndex++ < numFixedArguments;
189+
arg.info = classifyArgumentType(arg.type, !fixedArgument, numRegsLeft);
190+
}
191+
}
192+
}
193+
66194
} // namespace
195+
67196
std::unique_ptr<TargetLoweringInfo>
68197
createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) {
69198
return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes());

clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,73 @@
22

33
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
44
// RUN: -fcuda-is-device -fhip-new-launch-api \
5-
// RUN: -I%S/../../Inputs/ -emit-cir %s -o %t.cir
6-
// RUN: FileCheck --input-file=%t.cir %s
5+
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
6+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
7+
8+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
9+
// RUN: -fcuda-is-device -fhip-new-launch-api \
10+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
11+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
12+
13+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
14+
// RUN: -fcuda-is-device -fhip-new-launch-api \
15+
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ogcg.ll
16+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
17+
18+
//===----------------------------------------------------------------------===//
19+
// Test ABI lowering from CIR to LLVM IR for AMDGPU
20+
//===----------------------------------------------------------------------===//
721

822
// Test simple kernel
9-
// CHECK: cir.func{{.*}} @_Z13simple_kerneli(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
10-
__global__ void simple_kernel(int x) {}
23+
// CIR: cir.func{{.*}} @_Z13simple_kernelv{{.*}} cc(amdgpu_kernel)
24+
// LLVM: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
25+
// OGCG: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
26+
__global__ void simple_kernel() {}
27+
28+
// Test kernel with int argument
29+
// CIR: cir.func{{.*}} @_Z14kernel_int_argi(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
30+
// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32 %{{.*}})
31+
// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32{{.*}} %{{.*}})
32+
__global__ void kernel_int_arg(int x) {}
33+
34+
// Test kernel with char argument
35+
// CIR: cir.func{{.*}} @_Z15kernel_char_argc(%arg{{[0-9]+}}: !s8i{{.*}}) cc(amdgpu_kernel)
36+
// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8 %{{.*}})
37+
// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8{{.*}} %{{.*}})
38+
__global__ void kernel_char_arg(char c) {}
1139

12-
// Test kernel with pointer
13-
// CHECK: cir.func{{.*}} @_Z15kernel_with_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
14-
__global__ void kernel_with_ptr(int *ptr) {}
40+
// Test kernel with pointer (HIP coerces generic pointers to global addrspace 1)
41+
// CIR: cir.func{{.*}} @_Z14kernel_ptr_argPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
42+
// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1) %{{.*}})
43+
// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1){{.*}} %{{.*}})
44+
__global__ void kernel_ptr_arg(int *ptr) {}
1545

16-
// Test kernel with multiple args
17-
// CHECK: cir.func{{.*}} @_Z16kernel_multi_argifdPi(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.double{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
18-
__global__ void kernel_multi_arg(int a, float b, double c, int *d) {}
46+
// Test kernel with multiple args (pointer coerced to global addrspace 1)
47+
// CIR: cir.func{{.*}} @_Z16kernel_multi_argifPf(%arg{{[0-9]+}}: !s32i{{.*}} !cir.float{{.*}} !cir.ptr<!cir.float{{.*}}>{{.*}}) cc(amdgpu_kernel)
48+
// LLVM: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1) %{{.*}})
49+
// OGCG: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
50+
__global__ void kernel_multi_arg(int a, float b, float *c) {}
1951

20-
// Test device function (NOT a kernel)
21-
// CHECK: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
22-
// CHECK-NOT: cc(amdgpu_kernel)
52+
// Test device function
53+
// CIR: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
54+
// LLVM: define{{.*}} void @_Z9device_fni(i32 %{{.*}})
55+
// OGCG: define{{.*}} void @_Z9device_fni(i32{{.*}} %{{.*}})
2356
__device__ void device_fn(int x) {}
2457

2558
// Test device function with return value
26-
// CHECK: cir.func{{.*}} @_Z13device_fn_reti(%arg{{[0-9]+}}: !s32i{{.*}}) -> !s32i
27-
// CHECK-NOT: cc(amdgpu_kernel)
28-
__device__ int device_fn_ret(int x) { return x + 1; }
59+
// CIR: cir.func{{.*}} @_Z15device_fn_floatf(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
60+
// LLVM: define{{.*}} float @_Z15device_fn_floatf(float %{{.*}})
61+
// OGCG: define{{.*}} float @_Z15device_fn_floatf(float{{.*}} %{{.*}})
62+
__device__ float device_fn_float(float f) { return f * 2.0f; }
63+
64+
// Test kernel with pointer (coerced to global addrspace 1)
65+
// CIR: cir.func{{.*}} @_Z17kernel_shared_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
66+
// LLVM: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1) %{{.*}})
67+
// OGCG: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1){{.*}} %{{.*}})
68+
__global__ void kernel_shared_ptr(int *ptr) {}
69+
70+
// Test variadic device function
71+
// CIR: cir.func{{.*}} @_Z11variadic_fniz(%arg{{[0-9]+}}: !s32i{{.*}}, ...)
72+
// LLVM: define{{.*}} void @_Z11variadic_fniz(i32 %{{.*}}, ...)
73+
// OGCG: define{{.*}} void @_Z11variadic_fniz(i32{{.*}} %{{.*}}, ...)
74+
__device__ void variadic_fn(int count, ...) {}

clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl

Lines changed: 0 additions & 19 deletions
This file was deleted.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
2+
// RUN: -emit-cir %s -o %t.cir
3+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4+
5+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
6+
// RUN: -emit-llvm %s -o %t.ll
7+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
8+
9+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
10+
// RUN: -emit-llvm %s -o %t.ogcg.ll
11+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
12+
13+
//===----------------------------------------------------------------------===//
14+
// Test ABI lowering from CIR to LLVM IR for AMDGPU OpenCL kernels
15+
//===----------------------------------------------------------------------===//
16+
17+
// Test simple kernel
18+
// CIR: cir.func{{.*}} @simple_kernel{{.*}} cc(amdgpu_kernel)
19+
// LLVM: define{{.*}} amdgpu_kernel void @simple_kernel()
20+
// OGCG: define{{.*}} amdgpu_kernel void @simple_kernel()
21+
__kernel void simple_kernel() {}
22+
23+
// Test kernel with int argument
24+
// CIR: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
25+
// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_int(i32 %{{.*}})
26+
// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_int(i32{{.*}} %{{.*}})
27+
__kernel void kernel_with_int(int x) {}
28+
29+
// Test kernel with pointer argument
30+
// CIR: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
31+
// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}}%{{.*}})
32+
// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}} %{{.*}})
33+
__kernel void kernel_with_ptr(global int *ptr) {}
34+
35+
// Test kernel with multiple args
36+
// CIR: cir.func{{.*}} @kernel_multi_arg(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!cir.float, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
37+
// LLVM: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1){{.*}}%{{.*}})
38+
// OGCG: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
39+
__kernel void kernel_multi_arg(int a, float b, global float *c) {}
40+
41+
// Test device function
42+
// CIR: cir.func{{.*}} @device_fn(%arg{{[0-9]+}}: !s32i{{.*}})
43+
// CIR-NOT: cc(amdgpu_kernel)
44+
// LLVM: define{{.*}} void @device_fn(i32 %{{.*}})
45+
// LLVM-NOT: amdgpu_kernel
46+
// OGCG: define{{.*}} void @device_fn(i32{{.*}} %{{.*}})
47+
// OGCG-NOT: amdgpu_kernel
48+
void device_fn(int x) {}
49+
50+
// Test device function with return value
51+
// CIR: cir.func{{.*}} @device_fn_float(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
52+
// LLVM: define{{.*}} float @device_fn_float(float %{{.*}})
53+
// OGCG: define{{.*}} float @device_fn_float(float{{.*}} %{{.*}})
54+
float device_fn_float(float f) { return f * 2.0f; }
55+
56+
// Test kernel with local address space pointer (addrspace 3)
57+
// CIR: cir.func{{.*}} @kernel_local_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_local)>{{.*}}) cc(amdgpu_kernel)
58+
// LLVM: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}}%{{.*}})
59+
// OGCG: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}} %{{.*}})
60+
__kernel void kernel_local_ptr(local int *ptr) {}
61+
62+
// Test kernel with constant address space pointer (addrspace 4)
63+
// CIR: cir.func{{.*}} @kernel_constant_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_constant)>{{.*}}) cc(amdgpu_kernel)
64+
// LLVM: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}}%{{.*}})
65+
// OGCG: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}} %{{.*}})
66+
__kernel void kernel_constant_ptr(constant int *ptr) {}

0 commit comments

Comments
 (0)