From 133ed0bf81db872c50266027fe60c10b34311ae8 Mon Sep 17 00:00:00 2001 From: ranapratap55 Date: Sat, 20 Dec 2025 18:09:10 +0000 Subject: [PATCH] [CIR][AMDGPU] Adds lowering for amdgcn raw buffer load/store and atomics --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 67 ++++++-- clang/lib/CIR/CodeGen/CIRGenTypes.cpp | 8 +- .../builtins-amdgcn-raw-buffer-atomics.hip | 93 +++++++++++ .../HIP/builtins-amdgcn-raw-buffer.hip | 145 ++++++++++++++++++ .../builtins-amdgcn-raw-buffer-atomics.cl | 91 +++++++++++ .../OpenCL/builtins-amdgcn-raw-buffer.cl | 143 +++++++++++++++++ 6 files changed, 532 insertions(+), 15 deletions(-) create mode 100644 clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip create mode 100644 clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip create mode 100644 clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl create mode 100644 clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 137db2cc29fb..6277583d53a2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -769,7 +769,14 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: { - llvm_unreachable("raw_buffer_store_* NYI"); + mlir::Type voidTy = cir::VoidType::get(builder.getContext()); + llvm::SmallVector Args; + for (unsigned I = 0; I < 5; ++I) + Args.push_back(emitScalarExpr(expr->getArg(I))); + auto CallOp = LLVMIntrinsicCallOp::create( + builder, getLoc(expr->getExprLoc()), + builder.getStringAttr("amdgcn.raw.ptr.buffer.store"), voidTy, Args); + return CallOp.getResult(); } case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: @@ -777,23 +784,55 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: { - llvm_unreachable("raw_buffer_load_* NYI"); - } - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: { - llvm_unreachable("raw_ptr_buffer_atomic_add_* NYI"); + mlir::Type retTy; + switch (builtinId) { + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: + retTy = builder.getUIntNTy(8); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: + retTy = builder.getUIntNTy(16); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: + retTy = builder.getUIntNTy(32); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: + retTy = cir::VectorType::get(builder.getUIntNTy(32), 2); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: + retTy = cir::VectorType::get(builder.getUIntNTy(32), 3); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: + retTy = cir::VectorType::get(builder.getUIntNTy(32), 4); + break; + } + return LLVMIntrinsicCallOp::create( + builder, getLoc(expr->getExprLoc()), + builder.getStringAttr("amdgcn.raw.ptr.buffer.load"), retTy, + {emitScalarExpr(expr->getArg(0)), + emitScalarExpr(expr->getArg(1)), + emitScalarExpr(expr->getArg(2)), + emitScalarExpr(expr->getArg(3))}) + .getResult(); } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32: + return emitBuiltinWithOneOverloadedType<5>( + expr, "amdgcn.raw.ptr.buffer.atomic.add") + .getScalarVal(); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: { - llvm_unreachable("raw_ptr_buffer_atomic_fadd_* NYI"); - } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16: + return emitBuiltinWithOneOverloadedType<5>( + expr, "amdgcn.raw.ptr.buffer.atomic.fadd") + .getScalarVal(); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64: { - llvm_unreachable("raw_ptr_buffer_atomic_fmin_* NYI"); - } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64: + return emitBuiltinWithOneOverloadedType<5>( + expr, "amdgcn.raw.ptr.buffer.atomic.fmin") + .getScalarVal(); case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32: - case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: { - llvm_unreachable("raw_ptr_buffer_atomic_fmax_* NYI"); - } + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: + return emitBuiltinWithOneOverloadedType<5>( + expr, "amdgcn.raw.ptr.buffer.atomic.fmax") + .getScalarVal(); case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: { llvm_unreachable("s_prefetch_data_* NYI"); } diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 36bc1cfb24c7..424a9f5ab9be 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -589,7 +589,13 @@ mlir::Type CIRGenTypes::convertType(QualType T) { #include "clang/Basic/WebAssemblyReferenceTypes.def" #define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \ case BuiltinType::Id: \ - ResultType = Builder.getPointerTo(CGM.VoidTy); \ + if (AS == 0) { \ + ResultType = Builder.getPointerTo(CGM.VoidTy); \ + } else { \ + ResultType = Builder.getPointerTo( \ + CGM.VoidTy, \ + cir::TargetAddressSpaceAttr::get(&getMLIRContext(), AS)); \ + } \ break; #define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ case BuiltinType::Id: \ diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip new file mode 100644 index 000000000000..d80bcd02f9c1 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip @@ -0,0 +1,93 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx90a -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx90a -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx90a -fcuda-is-device \ +// RUN: -target-feature +atomic-fmin-fmax-global-f32 \ +// RUN: -target-feature +atomic-fmin-fmax-global-f64 \ +// RUN: -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test raw buffer atomic builtins +//===----------------------------------------------------------------------===// + +typedef _Float16 __attribute__((ext_vector_type(2))) float16x2_t; + +// CIR-LABEL: @_Z19test_atomic_add_i32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.add" {{.*}} : (!s32i, !cir.ptr, !s32i, !s32i, !s32i) -> !s32i +// LLVM-LABEL: define{{.*}} i32 @_Z19test_atomic_add_i32 +// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} i32 @_Z19test_atomic_add_i32 +// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ int test_atomic_add_i32(int x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z20test_atomic_fadd_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fadd_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fadd_f32 +// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ float test_atomic_fadd_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z22test_atomic_fadd_v2f16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} <2 x half> @_Z22test_atomic_fadd_v2f16 +// LLVM: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} <2 x half> @_Z22test_atomic_fadd_v2f16 +// OGCG: call {{.*}}<2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ float16x2_t test_atomic_fadd_v2f16(float16x2_t x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z20test_atomic_fmin_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fmin_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fmin_f32 +// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ float test_atomic_fmin_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z20test_atomic_fmin_f64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.double, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.double +// LLVM-LABEL: define{{.*}} double @_Z20test_atomic_fmin_f64 +// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} double @_Z20test_atomic_fmin_f64 +// OGCG: call {{.*}}double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ double test_atomic_fmin_f64(double x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z20test_atomic_fmax_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fmax_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fmax_f32 +// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ float test_atomic_fmax_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @_Z20test_atomic_fmax_f64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.double, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.double +// LLVM-LABEL: define{{.*}} double @_Z20test_atomic_fmax_f64 +// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} double @_Z20test_atomic_fmax_f64 +// OGCG: call {{.*}}double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +__device__ double test_atomic_fmax_f64(double x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip new file mode 100644 index 000000000000..016fdf75803c --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip @@ -0,0 +1,145 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu verde -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test raw buffer load/store builtins +//===----------------------------------------------------------------------===// + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned int v2u32 __attribute__((ext_vector_type(2))); +typedef unsigned int v3u32 __attribute__((ext_vector_type(3))); +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +// CIR-LABEL: @_Z24test_raw_buffer_store_b8 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u8i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_store_b8 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_store_b8 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b8(u8 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b8(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z25test_raw_buffer_store_b16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u16i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b16 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b16 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b16(u16 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b16(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z25test_raw_buffer_store_b32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u32i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b32 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b32 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b32(u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b32(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z25test_raw_buffer_store_b64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b64 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b64 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b64(v2u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b64(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z25test_raw_buffer_store_b96 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b96 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b96 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b96(v3u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b96(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z26test_raw_buffer_store_b128 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @_Z26test_raw_buffer_store_b128 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z26test_raw_buffer_store_b128 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_store_b128(v4u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b128(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z23test_raw_buffer_load_b8 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u8i +// LLVM-LABEL: define{{.*}} void @_Z23test_raw_buffer_load_b8 +// LLVM: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z23test_raw_buffer_load_b8 +// OGCG: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b8(u8* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b8(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z24test_raw_buffer_load_b16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u16i +// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b16 +// LLVM: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b16 +// OGCG: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b16(u16* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b16(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z24test_raw_buffer_load_b32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u32i +// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b32 +// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b32 +// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b32(u32* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b32(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z24test_raw_buffer_load_b64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b64 +// LLVM: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b64 +// OGCG: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b64(v2u32* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b64(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z24test_raw_buffer_load_b96 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b96 +// LLVM: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b96 +// OGCG: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b96(v3u32* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b96(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @_Z25test_raw_buffer_load_b128 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_load_b128 +// LLVM: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_load_b128 +// OGCG: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +__device__ void test_raw_buffer_load_b128(v4u32* out, __amdgpu_buffer_rsrc_t rsrc) { + *out = __builtin_amdgcn_raw_buffer_load_b128(rsrc, 0, 0, 0); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl new file mode 100644 index 000000000000..d170561a2cb1 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl @@ -0,0 +1,91 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx90a -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx90a -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx90a \ +// RUN: -target-feature +atomic-fmin-fmax-global-f32 \ +// RUN: -target-feature +atomic-fmin-fmax-global-f64 \ +// RUN: -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test raw buffer atomic builtins +//===----------------------------------------------------------------------===// + +typedef half __attribute__((ext_vector_type(2))) float16x2_t; + +// CIR-LABEL: @test_atomic_add_i32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.add" {{.*}} : (!s32i, !cir.ptr, !s32i, !s32i, !s32i) -> !s32i +// LLVM-LABEL: define{{.*}} i32 @test_atomic_add_i32 +// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} i32 @test_atomic_add_i32 +// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fadd_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @test_atomic_fadd_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @test_atomic_fadd_f32 +// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fadd_v2f16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} <2 x half> @test_atomic_fadd_v2f16 +// LLVM: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} <2 x half> @test_atomic_fadd_v2f16 +// OGCG: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fmin_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @test_atomic_fmin_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @test_atomic_fmin_f32 +// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +float test_atomic_fmin_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fmin_f64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.double, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.double +// LLVM-LABEL: define{{.*}} double @test_atomic_fmin_f64 +// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} double @test_atomic_fmin_f64 +// OGCG: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +double test_atomic_fmin_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fmax_f32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.float, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.float +// LLVM-LABEL: define{{.*}} float @test_atomic_fmax_f32 +// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} float @test_atomic_fmax_f32 +// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0); +} + +// CIR-LABEL: @test_atomic_fmax_f64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.double, !cir.ptr, !s32i, !s32i, !s32i) -> !cir.double +// LLVM-LABEL: define{{.*}} double @test_atomic_fmax_f64 +// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +// OGCG-LABEL: define{{.*}} double @test_atomic_fmax_f64 +// OGCG: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0) +double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) { + return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl new file mode 100644 index 000000000000..90fac2330522 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl @@ -0,0 +1,143 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu verde -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu verde -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu verde -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test raw buffer load/store builtins +//===----------------------------------------------------------------------===// + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned int v2u32 __attribute__((ext_vector_type(2))); +typedef unsigned int v3u32 __attribute__((ext_vector_type(3))); +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +// CIR-LABEL: @test_raw_buffer_store_b8 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u8i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b8 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b8 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b8(u8 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b8(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_store_b16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u16i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b16 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b16 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b16(u16 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b16(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_store_b32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u32i, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b32 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b32 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b32(u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b32(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_store_b64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b64 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b64 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b64(v2u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b64(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_store_b96 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b96 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b96 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b96(v3u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b96(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_store_b128 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector, !cir.ptr, !s32i, !s32i, !s32i) +// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b128 +// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b128 +// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +void test_raw_buffer_store_b128(v4u32 vdata, __amdgpu_buffer_rsrc_t rsrc) { + __builtin_amdgcn_raw_buffer_store_b128(vdata, rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b8 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u8i +// LLVM-LABEL: define{{.*}} i8 @test_raw_buffer_load_b8 +// LLVM: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} i8 @test_raw_buffer_load_b8 +// OGCG: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +u8 test_raw_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b8(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b16 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u16i +// LLVM-LABEL: define{{.*}} i16 @test_raw_buffer_load_b16 +// LLVM: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} i16 @test_raw_buffer_load_b16 +// OGCG: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +u16 test_raw_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b16(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b32 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !u32i +// LLVM-LABEL: define{{.*}} i32 @test_raw_buffer_load_b32 +// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} i32 @test_raw_buffer_load_b32 +// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +u32 test_raw_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b32(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b64 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} <2 x i32> @test_raw_buffer_load_b64 +// LLVM: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} <2 x i32> @test_raw_buffer_load_b64 +// OGCG: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +v2u32 test_raw_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b64(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b96 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} <3 x i32> @test_raw_buffer_load_b96 +// LLVM: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} <3 x i32> @test_raw_buffer_load_b96 +// OGCG: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +v3u32 test_raw_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b96(rsrc, 0, 0, 0); +} + +// CIR-LABEL: @test_raw_buffer_load_b128 +// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr, !s32i, !s32i, !s32i) -> !cir.vector +// LLVM-LABEL: define{{.*}} <4 x i32> @test_raw_buffer_load_b128 +// LLVM: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +// OGCG-LABEL: define{{.*}} <4 x i32> @test_raw_buffer_load_b128 +// OGCG: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0) +v4u32 test_raw_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_raw_buffer_load_b128(rsrc, 0, 0, 0); +}