Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1388,8 +1388,14 @@ cir::TypeEvaluationKind LowerFunction::getEvaluationKind(mlir::Type type) {
// FIXME(cir): Implement type classes for CIR types.
if (mlir::isa<RecordType>(type))
return cir::TypeEvaluationKind::TEK_Aggregate;

if (mlir::isa<ComplexType>(type))
return cir::TypeEvaluationKind::TEK_Complex;

// Scalar types
if (mlir::isa<BoolType, IntType, SingleType, DoubleType, LongDoubleType,
VectorType, PointerType>(type))
VectorType, PointerType, VoidType, FP16Type, FP80Type,
FP128Type, BF16Type>(type))
return cir::TypeEvaluationKind::TEK_Scalar;
cir_cconv_unreachable("NYI");
}
Expand Down
142 changes: 139 additions & 3 deletions clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
#include "LowerTypes.h"
#include "TargetInfo.h"
#include "TargetLoweringInfo.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "clang/Basic/AddressSpaces.h"
#include "clang/CIR/ABIArgInfo.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"

Expand All @@ -34,9 +38,19 @@ class AMDGPUABIInfo : public ABIInfo {
AMDGPUABIInfo(LowerTypes &lt) : ABIInfo(lt) {}

private:
void computeInfo(LowerFunctionInfo &fi) const override {
llvm_unreachable("NYI");
}
static const unsigned maxNumRegsForArgsRet = 16;

unsigned numRegsForType(mlir::Type ty) const;

// Coerce HIP scalar pointer arguments from generic pointers to global ones.
mlir::Type coerceKernelArgumentType(mlir::Type ty, unsigned fromAS,
unsigned toAS) const;

ABIArgInfo classifyReturnType(mlir::Type ty) const;
ABIArgInfo classifyArgumentType(mlir::Type ty, bool variadic,
unsigned &numRegsLeft) const;
ABIArgInfo classifyKernelArgumentType(mlir::Type ty) const;
void computeInfo(LowerFunctionInfo &fi) const override;
};

class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
Expand All @@ -63,7 +77,129 @@ class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
}
};

// Estimate the number of registers the type will use
unsigned AMDGPUABIInfo::numRegsForType(mlir::Type ty) const {
if (isAggregateTypeForABI(ty)) {
llvm_unreachable("numRegsForType for aggregate types is NYI for AMDGPU");
}

uint64_t size = getContext().getTypeSize(ty);
return (size + 31) / 32;
}

// Coerce HIP scalar pointer arguments from generic pointers to global ones.
mlir::Type AMDGPUABIInfo::coerceKernelArgumentType(mlir::Type ty,
unsigned fromAS,
unsigned toAS) const {
if (auto ptrTy = mlir::dyn_cast<cir::PointerType>(ty)) {
mlir::Attribute addrSpaceAttr = ptrTy.getAddrSpace();
unsigned currentAS = 0;
// Get the current address space.
if (auto targetAS = mlir::dyn_cast_if_present<cir::TargetAddressSpaceAttr>(
addrSpaceAttr))
currentAS = targetAS.getValue();
// If currentAS is same as the FromAS, coerce it to the ToAS.
if (currentAS == fromAS) {
auto newAddrSpaceAttr =
cir::TargetAddressSpaceAttr::get(ty.getContext(), toAS);
return cir::PointerType::get(ptrTy.getPointee(), newAddrSpaceAttr);
}
}
return ty;
}

ABIArgInfo AMDGPUABIInfo::classifyReturnType(mlir::Type ty) const {
if (isAggregateTypeForABI(ty)) {
llvm_unreachable(
"classifyReturnType for aggregate types is NYI for AMDGPU");
}

return isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
: ABIArgInfo::getDirect();
}

ABIArgInfo AMDGPUABIInfo::classifyArgumentType(mlir::Type ty, bool variadic,
unsigned &numRegsLeft) const {
assert(numRegsLeft <= maxNumRegsForArgsRet && "register estimate underflow");

ty = useFirstFieldIfTransparentUnion(ty);

if (isAggregateTypeForABI(ty)) {
llvm_unreachable(
"classifyArgumentType for aggregate types is NYI for AMDGPU");
}

if (variadic) {
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, 0);
}

ABIArgInfo argInfo =
(isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
: ABIArgInfo::getDirect());

// Track register usage
if (!argInfo.isIndirect()) {
unsigned numRegs = numRegsForType(ty);
numRegsLeft -= std::min(numRegs, numRegsLeft);
}

return argInfo;
}

ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(mlir::Type ty) const {
ty = useFirstFieldIfTransparentUnion(ty);

// Aggregate types are not yet supported
if (isAggregateTypeForABI(ty)) {
llvm_unreachable("Aggregate types NYI for AMDGPU kernel arguments");
}

mlir::Type origTy = ty;
mlir::Type coercedTy = origTy;

// Determine if the target is in HIP, based on the triple.
// TODO: use getLangOpts().HIP instead.
const auto &Triple = getTarget().getTriple();
bool isHIP = Triple.getArch() == llvm::Triple::amdgcn &&
Triple.getOS() == llvm::Triple::AMDHSA;

// For HIP, coerce pointer arguments from generic to global
if (isHIP) {
unsigned genericAS =
getTarget().getTargetAddressSpace(clang::LangAS::Default);
unsigned globalAS =
getTarget().getTargetAddressSpace(clang::LangAS::cuda_device);
coercedTy = coerceKernelArgumentType(origTy, genericAS, globalAS);
}

// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
return ABIArgInfo::getDirect(coercedTy, 0, nullptr, false);
}

void AMDGPUABIInfo::computeInfo(LowerFunctionInfo &fi) const {
const unsigned cc = fi.getCallingConvention();

if (!getCXXABI().classifyReturnType(fi))
fi.getReturnInfo() = classifyReturnType(fi.getReturnType());

unsigned argumentIndex = 0;
const unsigned numFixedArguments = fi.getNumRequiredArgs();

unsigned numRegsLeft = maxNumRegsForArgsRet;
for (auto &arg : fi.arguments()) {
if (cc == llvm::CallingConv::AMDGPU_KERNEL) {
arg.info = classifyKernelArgumentType(arg.type);
} else {
bool fixedArgument = argumentIndex++ < numFixedArguments;
arg.info = classifyArgumentType(arg.type, !fixedArgument, numRegsLeft);
}
}
}

} // namespace

std::unique_ptr<TargetLoweringInfo>
createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) {
return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes());
Expand Down
78 changes: 62 additions & 16 deletions clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,73 @@

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../../Inputs/ -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ogcg.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s

//===----------------------------------------------------------------------===//
// Test ABI lowering from CIR to LLVM IR for AMDGPU
//===----------------------------------------------------------------------===//

// Test simple kernel
// CHECK: cir.func{{.*}} @_Z13simple_kerneli(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
__global__ void simple_kernel(int x) {}
// CIR: cir.func{{.*}} @_Z13simple_kernelv{{.*}} cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
// OGCG: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
__global__ void simple_kernel() {}

// Test kernel with int argument
// CIR: cir.func{{.*}} @_Z14kernel_int_argi(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32 %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32{{.*}} %{{.*}})
__global__ void kernel_int_arg(int x) {}

// Test kernel with char argument
// CIR: cir.func{{.*}} @_Z15kernel_char_argc(%arg{{[0-9]+}}: !s8i{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8 %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8{{.*}} %{{.*}})
__global__ void kernel_char_arg(char c) {}

// Test kernel with pointer
// CHECK: cir.func{{.*}} @_Z15kernel_with_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
__global__ void kernel_with_ptr(int *ptr) {}
// Test kernel with pointer (HIP coerces generic pointers to global addrspace 1)
// CIR: cir.func{{.*}} @_Z14kernel_ptr_argPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1) %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1){{.*}} %{{.*}})
__global__ void kernel_ptr_arg(int *ptr) {}

// Test kernel with multiple args
// CHECK: cir.func{{.*}} @_Z16kernel_multi_argifdPi(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.double{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
__global__ void kernel_multi_arg(int a, float b, double c, int *d) {}
// Test kernel with multiple args (pointer coerced to global addrspace 1)
// CIR: cir.func{{.*}} @_Z16kernel_multi_argifPf(%arg{{[0-9]+}}: !s32i{{.*}} !cir.float{{.*}} !cir.ptr<!cir.float{{.*}}>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1) %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
__global__ void kernel_multi_arg(int a, float b, float *c) {}

// Test device function (NOT a kernel)
// CHECK: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
// CHECK-NOT: cc(amdgpu_kernel)
// Test device function
// CIR: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
// LLVM: define{{.*}} void @_Z9device_fni(i32 %{{.*}})
// OGCG: define{{.*}} void @_Z9device_fni(i32{{.*}} %{{.*}})
__device__ void device_fn(int x) {}

// Test device function with return value
// CHECK: cir.func{{.*}} @_Z13device_fn_reti(%arg{{[0-9]+}}: !s32i{{.*}}) -> !s32i
// CHECK-NOT: cc(amdgpu_kernel)
__device__ int device_fn_ret(int x) { return x + 1; }
// CIR: cir.func{{.*}} @_Z15device_fn_floatf(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
// LLVM: define{{.*}} float @_Z15device_fn_floatf(float %{{.*}})
// OGCG: define{{.*}} float @_Z15device_fn_floatf(float{{.*}} %{{.*}})
__device__ float device_fn_float(float f) { return f * 2.0f; }

// Test kernel with pointer (coerced to global addrspace 1)
// CIR: cir.func{{.*}} @_Z17kernel_shared_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1) %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1){{.*}} %{{.*}})
__global__ void kernel_shared_ptr(int *ptr) {}

// Test variadic device function
// CIR: cir.func{{.*}} @_Z11variadic_fniz(%arg{{[0-9]+}}: !s32i{{.*}}, ...)
// LLVM: define{{.*}} void @_Z11variadic_fniz(i32 %{{.*}}, ...)
// OGCG: define{{.*}} void @_Z11variadic_fniz(i32{{.*}} %{{.*}}, ...)
__device__ void variadic_fn(int count, ...) {}
19 changes: 0 additions & 19 deletions clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl

This file was deleted.

66 changes: 66 additions & 0 deletions clang/test/CIR/CodeGen/OpenCL/amdgpu-kernel-abi.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
// RUN: -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
// RUN: -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -emit-llvm %s -o %t.ogcg.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s

//===----------------------------------------------------------------------===//
// Test ABI lowering from CIR to LLVM IR for AMDGPU OpenCL kernels
//===----------------------------------------------------------------------===//

// Test simple kernel
// CIR: cir.func{{.*}} @simple_kernel{{.*}} cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @simple_kernel()
// OGCG: define{{.*}} amdgpu_kernel void @simple_kernel()
__kernel void simple_kernel() {}

// Test kernel with int argument
// CIR: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_int(i32 %{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_int(i32{{.*}} %{{.*}})
__kernel void kernel_with_int(int x) {}

// Test kernel with pointer argument
// CIR: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}}%{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}} %{{.*}})
__kernel void kernel_with_ptr(global int *ptr) {}

// Test kernel with multiple args
// CIR: cir.func{{.*}} @kernel_multi_arg(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!cir.float, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1){{.*}}%{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
__kernel void kernel_multi_arg(int a, float b, global float *c) {}

// Test device function
// CIR: cir.func{{.*}} @device_fn(%arg{{[0-9]+}}: !s32i{{.*}})
// CIR-NOT: cc(amdgpu_kernel)
// LLVM: define{{.*}} void @device_fn(i32 %{{.*}})
// LLVM-NOT: amdgpu_kernel
// OGCG: define{{.*}} void @device_fn(i32{{.*}} %{{.*}})
// OGCG-NOT: amdgpu_kernel
void device_fn(int x) {}

// Test device function with return value
// CIR: cir.func{{.*}} @device_fn_float(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
// LLVM: define{{.*}} float @device_fn_float(float %{{.*}})
// OGCG: define{{.*}} float @device_fn_float(float{{.*}} %{{.*}})
float device_fn_float(float f) { return f * 2.0f; }

// Test kernel with local address space pointer (addrspace 3)
// CIR: cir.func{{.*}} @kernel_local_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_local)>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}}%{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}} %{{.*}})
__kernel void kernel_local_ptr(local int *ptr) {}

// Test kernel with constant address space pointer (addrspace 4)
// CIR: cir.func{{.*}} @kernel_constant_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_constant)>{{.*}}) cc(amdgpu_kernel)
// LLVM: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}}%{{.*}})
// OGCG: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}} %{{.*}})
__kernel void kernel_constant_ptr(constant int *ptr) {}
Loading