diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 66786698275d..d796d8340ff1 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4003,7 +4003,8 @@ def CIR_CallingConv : CIR_I32EnumAttr<"CallingConv", "calling convention", [ I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">, I32EnumAttrCase<"SpirFunction", 3, "spir_function">, I32EnumAttrCase<"OpenCLKernel", 4, "opencl_kernel">, - I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel"> + I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">, + I32EnumAttrCase<"AMDGPUKernel", 6, "amdgpu_kernel"> ]>; def CIR_OptionalPriorityAttr : OptionalAttr< diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 15374e767648..369ccb9a5401 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -1393,10 +1393,10 @@ CIRGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { if (MD->isInstance()) return arrangeCXXMethodDeclaration(MD); - auto FTy = FD->getType()->getCanonicalTypeUnqualified(); + CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified(); assert(isa(FTy)); - // TODO: setCUDAKernelCallingConvention + setCUDAKernelCallingConvention(FTy, CGM, FD); // When declaring a function without a prototype, always use a non-variadic // type. diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index a2974a9a6dbc..5c9be81f8212 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -378,8 +378,13 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique(cgt)) {} + cir::CallingConv getOpenCLKernelCallingConv() const override { + return cir::CallingConv::AMDGPUKernel; + } + void setCUDAKernelCallingConvention(const FunctionType *&ft) const override { - llvm_unreachable("NYI"); + ft = getABIInfo().getContext().adjustFunctionType( + ft, ft->getExtInfo().withCallingConv(CC_DeviceKernel)); } }; diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 1c9b36b8ef94..6a59d0006d38 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -901,6 +901,8 @@ mlir::LLVM::CConv convertCallingConv(cir::CallingConv callingConv) { llvm_unreachable("NYI"); case CIR::PTXKernel: return LLVM::PTX_Kernel; + case CIR::AMDGPUKernel: + return LLVM::AMDGPU_KERNEL; } llvm_unreachable("Unknown calling convention"); } diff --git a/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip b/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip new file mode 100644 index 000000000000..014b9da2b330 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/amdgpu-hip-kernel-abi.hip @@ -0,0 +1,28 @@ +#include "../Inputs/cuda.h" + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../../Inputs/ -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +// Test simple kernel +// CHECK: cir.func{{.*}} @_Z13simple_kerneli(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) +__global__ void simple_kernel(int x) {} + +// Test kernel with pointer +// CHECK: cir.func{{.*}} @_Z15kernel_with_ptrPi(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +__global__ void kernel_with_ptr(int *ptr) {} + +// Test kernel with multiple args +// CHECK: cir.func{{.*}} @_Z16kernel_multi_argifdPi(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.double{{.*}}, %arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +__global__ void kernel_multi_arg(int a, float b, double c, int *d) {} + +// Test device function (NOT a kernel) +// CHECK: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}}) +// CHECK-NOT: cc(amdgpu_kernel) +__device__ void device_fn(int x) {} + +// Test device function with return value +// CHECK: cir.func{{.*}} @_Z13device_fn_reti(%arg{{[0-9]+}}: !s32i{{.*}}) -> !s32i +// CHECK-NOT: cc(amdgpu_kernel) +__device__ int device_fn_ret(int x) { return x + 1; } diff --git a/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl b/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl new file mode 100644 index 000000000000..6c94b1921422 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/amdgpu-kernel-calling-conv.cl @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -fclangir -triple amdgcn-amd-amdhsa -emit-cir -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +// Test kernel function with amdgpu_kernel calling convention +// CHECK: cir.func{{.*}} @simple_kernel() cc(amdgpu_kernel) +__kernel void simple_kernel() {} + +// Test kernel with simple integer argument +// CHECK: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel) +__kernel void kernel_with_int(int x) {} + +// Test kernel with pointer argument (should be in global address space) +// CHECK: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr{{.*}}) cc(amdgpu_kernel) +__kernel void kernel_with_ptr(global int *ptr) {} + +// Test device function (should NOT have amdgpu_kernel calling convention) +// CHECK: cir.func{{.*}} @device_fn +// CHECK-NOT: cc(amdgpu_kernel) +void device_fn(int x) {} diff --git a/clang/test/CIR/CodeGen/HIP/simple.cpp b/clang/test/CIR/CodeGen/HIP/simple.cpp index c1850417c69e..2b17c3ef396b 100644 --- a/clang/test/CIR/CodeGen/HIP/simple.cpp +++ b/clang/test/CIR/CodeGen/HIP/simple.cpp @@ -64,8 +64,8 @@ __device__ void device_fn(int* a, double b, float c) {} // CIR-DEVICE: cir.func {{.*}} @_Z9device_fnPidf __global__ void global_fn(int a) {} -// CIR-DEVICE: @_Z9global_fni -// LLVM-DEVICE: define dso_local void @_Z9global_fni +// CIR-DEVICE: @_Z9global_fni{{.*}} cc(amdgpu_kernel) +// LLVM-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni // OGCG-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni // CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])