Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -4003,7 +4003,8 @@ def CIR_CallingConv : CIR_I32EnumAttr<"CallingConv", "calling convention", [
I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">,
I32EnumAttrCase<"SpirFunction", 3, "spir_function">,
I32EnumAttrCase<"OpenCLKernel", 4, "opencl_kernel">,
I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">
I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">,
I32EnumAttrCase<"AMDGPUKernel", 6, "amdgpu_kernel">
]>;

def CIR_OptionalPriorityAttr : OptionalAttr<
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1393,10 +1393,10 @@ CIRGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
if (MD->isInstance())
return arrangeCXXMethodDeclaration(MD);

auto FTy = FD->getType()->getCanonicalTypeUnqualified();
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();

assert(isa<FunctionType>(FTy));
// TODO: setCUDAKernelCallingConvention
setCUDAKernelCallingConvention(FTy, CGM, FD);

// When declaring a function without a prototype, always use a non-variadic
// type.
Expand Down
7 changes: 6 additions & 1 deletion clang/lib/CIR/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,13 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo {
AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt)
: TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {}

cir::CallingConv getOpenCLKernelCallingConv() const override {
return cir::CallingConv::AMDGPUKernel;
}

void setCUDAKernelCallingConvention(const FunctionType *&ft) const override {
llvm_unreachable("NYI");
ft = getABIInfo().getContext().adjustFunctionType(
ft, ft->getExtInfo().withCallingConv(CC_DeviceKernel));
}
};

Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,8 @@ mlir::LLVM::CConv convertCallingConv(cir::CallingConv callingConv) {
llvm_unreachable("NYI");
case CIR::PTXKernel:
return LLVM::PTX_Kernel;
case CIR::AMDGPUKernel:
return LLVM::AMDGPU_KERNEL;
}
llvm_unreachable("Unknown calling convention");
}
Expand Down
28 changes: 28 additions & 0 deletions clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "../Inputs/cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../../Inputs/ -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s

// Test simple kernel
// CHECK: cir.func{{.*}} @_Z13simple_kerneli(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
__global__ void simple_kernel(int x) {}

// Test kernel with pointer
// CHECK: cir.func{{.*}} @_Z15kernel_with_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
__global__ void kernel_with_ptr(int *ptr) {}

// Test kernel with multiple args
// CHECK: cir.func{{.*}} @_Z16kernel_multi_argifdPi(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.double{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
__global__ void kernel_multi_arg(int a, float b, double c, int *d) {}

// Test device function (NOT a kernel)
// CHECK: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
// CHECK-NOT: cc(amdgpu_kernel)
__device__ void device_fn(int x) {}

// Test device function with return value
// CHECK: cir.func{{.*}} @_Z13device_fn_reti(%arg{{[0-9]+}}: !s32i{{.*}}) -> !s32i
// CHECK-NOT: cc(amdgpu_kernel)
__device__ int device_fn_ret(int x) { return x + 1; }
19 changes: 19 additions & 0 deletions clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: %clang_cc1 %s -fclangir -triple amdgcn-amd-amdhsa -emit-cir -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s

// Test kernel function with amdgpu_kernel calling convention
// CHECK: cir.func{{.*}} @simple_kernel() cc(amdgpu_kernel)
__kernel void simple_kernel() {}

// Test kernel with simple integer argument
// CHECK: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
__kernel void kernel_with_int(int x) {}

// Test kernel with pointer argument (should be in global address space)
// CHECK: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
__kernel void kernel_with_ptr(global int *ptr) {}

// Test device function (should NOT have amdgpu_kernel calling convention)
// CHECK: cir.func{{.*}} @device_fn
// CHECK-NOT: cc(amdgpu_kernel)
void device_fn(int x) {}
4 changes: 2 additions & 2 deletions clang/test/CIR/CodeGen/HIP/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ __device__ void device_fn(int* a, double b, float c) {}
// CIR-DEVICE: cir.func {{.*}} @_Z9device_fnPidf

__global__ void global_fn(int a) {}
// CIR-DEVICE: @_Z9global_fni
// LLVM-DEVICE: define dso_local void @_Z9global_fni
// CIR-DEVICE: @_Z9global_fni{{.*}} cc(amdgpu_kernel)
// LLVM-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni
// OGCG-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni

// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
Expand Down