diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp index dc9bf64ba3d6..f318ea7b1fdc 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp @@ -1388,8 +1388,14 @@ cir::TypeEvaluationKind LowerFunction::getEvaluationKind(mlir::Type type) { // FIXME(cir): Implement type classes for CIR types. if (mlir::isa(type)) return cir::TypeEvaluationKind::TEK_Aggregate; + + if (mlir::isa(type)) + return cir::TypeEvaluationKind::TEK_Complex; + + // Scalar types if (mlir::isa(type)) + VectorType, PointerType, VoidType, FP16Type, FP80Type, + FP128Type, BF16Type>(type)) return cir::TypeEvaluationKind::TEK_Scalar; cir_cconv_unreachable("NYI"); } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp index 76d457f1607b..b333e7bcb53b 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -11,10 +11,14 @@ #include "LowerTypes.h" #include "TargetInfo.h" #include "TargetLoweringInfo.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/ABIArgInfo.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -34,9 +38,19 @@ class AMDGPUABIInfo : public ABIInfo { AMDGPUABIInfo(LowerTypes <) : ABIInfo(lt) {} private: - void computeInfo(LowerFunctionInfo &fi) const override { - llvm_unreachable("NYI"); - } + static const unsigned maxNumRegsForArgsRet = 16; + + unsigned numRegsForType(mlir::Type ty) const; + + // Coerce HIP scalar pointer arguments from generic pointers to global ones. + mlir::Type coerceKernelArgumentType(mlir::Type ty, unsigned fromAS, + unsigned toAS) const; + + ABIArgInfo classifyReturnType(mlir::Type ty) const; + ABIArgInfo classifyArgumentType(mlir::Type ty, bool variadic, + unsigned &numRegsLeft) const; + ABIArgInfo classifyKernelArgumentType(mlir::Type ty) const; + void computeInfo(LowerFunctionInfo &fi) const override; }; class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { @@ -63,7 +77,129 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { } }; +// Estimate the number of registers the type will use +unsigned AMDGPUABIInfo::numRegsForType(mlir::Type ty) const { + if (isAggregateTypeForABI(ty)) { + llvm_unreachable("numRegsForType for aggregate types is NYI for AMDGPU"); + } + + uint64_t size = getContext().getTypeSize(ty); + return (size + 31) / 32; +} + +// Coerce HIP scalar pointer arguments from generic pointers to global ones. +mlir::Type AMDGPUABIInfo::coerceKernelArgumentType(mlir::Type ty, + unsigned fromAS, + unsigned toAS) const { + if (auto ptrTy = mlir::dyn_cast(ty)) { + mlir::Attribute addrSpaceAttr = ptrTy.getAddrSpace(); + unsigned currentAS = 0; + // Get the current address space. + if (auto targetAS = mlir::dyn_cast_if_present( + addrSpaceAttr)) + currentAS = targetAS.getValue(); + // If currentAS is same as the FromAS, coerce it to the ToAS. + if (currentAS == fromAS) { + auto newAddrSpaceAttr = + cir::TargetAddressSpaceAttr::get(ty.getContext(), toAS); + return cir::PointerType::get(ptrTy.getPointee(), newAddrSpaceAttr); + } + } + return ty; +} + +ABIArgInfo AMDGPUABIInfo::classifyReturnType(mlir::Type ty) const { + if (isAggregateTypeForABI(ty)) { + llvm_unreachable( + "classifyReturnType for aggregate types is NYI for AMDGPU"); + } + + return isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty) + : ABIArgInfo::getDirect(); +} + +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(mlir::Type ty, bool variadic, + unsigned &numRegsLeft) const { + assert(numRegsLeft <= maxNumRegsForArgsRet && "register estimate underflow"); + + ty = useFirstFieldIfTransparentUnion(ty); + + if (isAggregateTypeForABI(ty)) { + llvm_unreachable( + "classifyArgumentType for aggregate types is NYI for AMDGPU"); + } + + if (variadic) { + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, 0); + } + + ABIArgInfo argInfo = + (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty) + : ABIArgInfo::getDirect()); + + // Track register usage + if (!argInfo.isIndirect()) { + unsigned numRegs = numRegsForType(ty); + numRegsLeft -= std::min(numRegs, numRegsLeft); + } + + return argInfo; +} + +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(mlir::Type ty) const { + ty = useFirstFieldIfTransparentUnion(ty); + + // Aggregate types are not yet supported + if (isAggregateTypeForABI(ty)) { + llvm_unreachable("Aggregate types NYI for AMDGPU kernel arguments"); + } + + mlir::Type origTy = ty; + mlir::Type coercedTy = origTy; + + // Determine if the target is in HIP, based on the triple. + // TODO: use getLangOpts().HIP instead. + const auto &Triple = getTarget().getTriple(); + bool isHIP = Triple.getArch() == llvm::Triple::amdgcn && + Triple.getOS() == llvm::Triple::AMDHSA; + + // For HIP, coerce pointer arguments from generic to global + if (isHIP) { + unsigned genericAS = + getTarget().getTargetAddressSpace(clang::LangAS::Default); + unsigned globalAS = + getTarget().getTargetAddressSpace(clang::LangAS::cuda_device); + coercedTy = coerceKernelArgumentType(origTy, genericAS, globalAS); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(coercedTy, 0, nullptr, false); +} + +void AMDGPUABIInfo::computeInfo(LowerFunctionInfo &fi) const { + const unsigned cc = fi.getCallingConvention(); + + if (!getCXXABI().classifyReturnType(fi)) + fi.getReturnInfo() = classifyReturnType(fi.getReturnType()); + + unsigned argumentIndex = 0; + const unsigned numFixedArguments = fi.getNumRequiredArgs(); + + unsigned numRegsLeft = maxNumRegsForArgsRet; + for (auto &arg : fi.arguments()) { + if (cc == llvm::CallingConv::AMDGPU_KERNEL) { + arg.info = classifyKernelArgumentType(arg.type); + } else { + bool fixedArgument = argumentIndex++ < numFixedArguments; + arg.info = classifyArgumentType(arg.type, !fixedArgument, numRegsLeft); + } + } +} + } // namespace + std::unique_ptr createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) { return std::make_unique(lowerModule.getTypes()); diff --git a/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip b/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip index 014b9da2b330..c429907dc076 100644 --- a/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip +++ b/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip @@ -2,27 +2,73 @@ // RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ // RUN: -fcuda-is-device -fhip-new-launch-api \ -// RUN: -I%S/../../Inputs/ -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s +// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s + +//===----------------------------------------------------------------------===// +// Test ABI lowering from CIR to LLVM IR for AMDGPU +//===----------------------------------------------------------------------===// // Test simple kernel -// CHECK: cir.func{{.*}} @_Z13simple_kerneli(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) -__global__ void simple_kernel(int x) {} +// CIR: cir.func{{.*}} @_Z13simple_kernelv{{.*}} cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv() +// OGCG: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv() +__global__ void simple_kernel() {} + +// Test kernel with int argument +// CIR: cir.func{{.*}} @_Z14kernel_int_argi(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32 %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32{{.*}} %{{.*}}) +__global__ void kernel_int_arg(int x) {} + +// Test kernel with char argument +// CIR: cir.func{{.*}} @_Z15kernel_char_argc(%arg{{[0-9]+}}: !s8i{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8 %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8{{.*}} %{{.*}}) +__global__ void kernel_char_arg(char c) {} -// Test kernel with pointer -// CHECK: cir.func{{.*}} @_Z15kernel_with_ptrPi(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) -__global__ void kernel_with_ptr(int *ptr) {} +// Test kernel with pointer (HIP coerces generic pointers to global addrspace 1) +// CIR: cir.func{{.*}} @_Z14kernel_ptr_argPi(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1) %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1){{.*}} %{{.*}}) +__global__ void kernel_ptr_arg(int *ptr) {} -// Test kernel with multiple args -// CHECK: cir.func{{.*}} @_Z16kernel_multi_argifdPi(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.double{{.*}}, %arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) -__global__ void kernel_multi_arg(int a, float b, double c, int *d) {} +// Test kernel with multiple args (pointer coerced to global addrspace 1) +// CIR: cir.func{{.*}} @_Z16kernel_multi_argifPf(%arg{{[0-9]+}}: !s32i{{.*}} !cir.float{{.*}} !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1) %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}}) +__global__ void kernel_multi_arg(int a, float b, float *c) {} -// Test device function (NOT a kernel) -// CHECK: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}}) -// CHECK-NOT: cc(amdgpu_kernel) +// Test device function +// CIR: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}}) +// LLVM: define{{.*}} void @_Z9device_fni(i32 %{{.*}}) +// OGCG: define{{.*}} void @_Z9device_fni(i32{{.*}} %{{.*}}) __device__ void device_fn(int x) {} // Test device function with return value -// CHECK: cir.func{{.*}} @_Z13device_fn_reti(%arg{{[0-9]+}}: !s32i{{.*}}) -> !s32i -// CHECK-NOT: cc(amdgpu_kernel) -__device__ int device_fn_ret(int x) { return x + 1; } +// CIR: cir.func{{.*}} @_Z15device_fn_floatf(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float +// LLVM: define{{.*}} float @_Z15device_fn_floatf(float %{{.*}}) +// OGCG: define{{.*}} float @_Z15device_fn_floatf(float{{.*}} %{{.*}}) +__device__ float device_fn_float(float f) { return f * 2.0f; } + +// Test kernel with pointer (coerced to global addrspace 1) +// CIR: cir.func{{.*}} @_Z17kernel_shared_ptrPi(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1) %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1){{.*}} %{{.*}}) +__global__ void kernel_shared_ptr(int *ptr) {} + +// Test variadic device function +// CIR: cir.func{{.*}} @_Z11variadic_fniz(%arg{{[0-9]+}}: !s32i{{.*}}, ...) +// LLVM: define{{.*}} void @_Z11variadic_fniz(i32 %{{.*}}, ...) +// OGCG: define{{.*}} void @_Z11variadic_fniz(i32{{.*}} %{{.*}}, ...) +__device__ void variadic_fn(int count, ...) {} diff --git a/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl b/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl deleted file mode 100644 index 6c94b1921422..000000000000 --- a/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: %clang_cc1 %s -fclangir -triple amdgcn-amd-amdhsa -emit-cir -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s - -// Test kernel function with amdgpu_kernel calling convention -// CHECK: cir.func{{.*}} @simple_kernel() cc(amdgpu_kernel) -__kernel void simple_kernel() {} - -// Test kernel with simple integer argument -// CHECK: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) -__kernel void kernel_with_int(int x) {} - -// Test kernel with pointer argument (should be in global address space) -// CHECK: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) -__kernel void kernel_with_ptr(global int *ptr) {} - -// Test device function (should NOT have amdgpu_kernel calling convention) -// CHECK: cir.func{{.*}} @device_fn -// CHECK-NOT: cc(amdgpu_kernel) -void device_fn(int x) {} diff --git a/clang/test/CIR/CodeGen/OpenCL/amdgpu-kernel-abi.cl b/clang/test/CIR/CodeGen/OpenCL/amdgpu-kernel-abi.cl new file mode 100644 index 000000000000..aee39944e4f8 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/amdgpu-kernel-abi.cl @@ -0,0 +1,66 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \ +// RUN: -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \ +// RUN: -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s + +//===----------------------------------------------------------------------===// +// Test ABI lowering from CIR to LLVM IR for AMDGPU OpenCL kernels +//===----------------------------------------------------------------------===// + +// Test simple kernel +// CIR: cir.func{{.*}} @simple_kernel{{.*}} cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @simple_kernel() +// OGCG: define{{.*}} amdgpu_kernel void @simple_kernel() +__kernel void simple_kernel() {} + +// Test kernel with int argument +// CIR: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_int(i32 %{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_int(i32{{.*}} %{{.*}}) +__kernel void kernel_with_int(int x) {} + +// Test kernel with pointer argument +// CIR: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}}%{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}} %{{.*}}) +__kernel void kernel_with_ptr(global int *ptr) {} + +// Test kernel with multiple args +// CIR: cir.func{{.*}} @kernel_multi_arg(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1){{.*}}%{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}}) +__kernel void kernel_multi_arg(int a, float b, global float *c) {} + +// Test device function +// CIR: cir.func{{.*}} @device_fn(%arg{{[0-9]+}}: !s32i{{.*}}) +// CIR-NOT: cc(amdgpu_kernel) +// LLVM: define{{.*}} void @device_fn(i32 %{{.*}}) +// LLVM-NOT: amdgpu_kernel +// OGCG: define{{.*}} void @device_fn(i32{{.*}} %{{.*}}) +// OGCG-NOT: amdgpu_kernel +void device_fn(int x) {} + +// Test device function with return value +// CIR: cir.func{{.*}} @device_fn_float(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float +// LLVM: define{{.*}} float @device_fn_float(float %{{.*}}) +// OGCG: define{{.*}} float @device_fn_float(float{{.*}} %{{.*}}) +float device_fn_float(float f) { return f * 2.0f; } + +// Test kernel with local address space pointer (addrspace 3) +// CIR: cir.func{{.*}} @kernel_local_ptr(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}}%{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}} %{{.*}}) +__kernel void kernel_local_ptr(local int *ptr) {} + +// Test kernel with constant address space pointer (addrspace 4) +// CIR: cir.func{{.*}} @kernel_constant_ptr(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +// LLVM: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}}%{{.*}}) +// OGCG: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}} %{{.*}}) +__kernel void kernel_constant_ptr(constant int *ptr) {}