diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index d6669230162c..03da63ede4bc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -333,11 +333,13 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: case AMDGPU::BI__builtin_amdgcn_rsq_bf16: { - llvm_unreachable("rsq_* NYI"); + return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.rsq") + .getScalarVal(); } case AMDGPU::BI__builtin_amdgcn_rsq_clamp: case AMDGPU::BI__builtin_amdgcn_rsq_clampf: { - llvm_unreachable("rsq_clamp_* NYI"); + return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.rsq.clamp") + .getScalarVal(); } case AMDGPU::BI__builtin_amdgcn_sinf: case AMDGPU::BI__builtin_amdgcn_sinh: diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx1250.hip index 2c2714220f2f..5997d64c61de 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx1250.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx1250.hip @@ -38,3 +38,14 @@ __device__ void test_sqrt_bf16(__bf16* out, __bf16 a) { *out = __builtin_amdgcn_sqrt_bf16(a); } + +// CIR-LABEL: @_Z13test_rsq_bf16PDF16bDF16b +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @_Z13test_rsq_bf16PDF16bDF16b +// LLVM: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}}) +// OGCG: define{{.*}} void @_Z13test_rsq_bf16PDF16bDF16b +// OGCG: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}}) +__device__ void test_rsq_bf16(__bf16* out, __bf16 a) +{ + *out = __builtin_amdgcn_rsq_bf16(a); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip index 1f8bd39c0e0d..c82ed6a3fca2 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-vi.hip @@ -87,3 +87,14 @@ __device__ void test_sqrt_f16(_Float16* out, _Float16 a) { *out = __builtin_amdgcn_sqrth(a); } + +// CIR-LABEL: @_Z10test_rsq_hPDF16_DF16_ +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @_Z10test_rsq_hPDF16_DF16_ +// LLVM: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}}) +// OGCG: define{{.*}} void @_Z10test_rsq_hPDF16_DF16_ +// OGCG: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}}) +__device__ void test_rsq_h(_Float16* out, _Float16 a) +{ + *out = __builtin_amdgcn_rsqh(a); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip index 1745b8c1f9cf..47d103631b27 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip @@ -385,3 +385,45 @@ __device__ void test_sqrt_f32(float* out, float a) { __device__ void test_sqrt_f64(double* out, double a) { *out = __builtin_amdgcn_sqrt(a); } + +// CIR-LABEL: @_Z12test_rsq_f32Pff +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z12test_rsq_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}}) +// OGCG: define{{.*}} void @_Z12test_rsq_f32Pff +// OGCG: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}}) +__device__ void test_rsq_f32(float* out, float a) +{ + *out = __builtin_amdgcn_rsqf(a); +} + +// CIR-LABEL: @_Z12test_rsq_f64Pdd +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z12test_rsq_f64Pdd +// LLVM: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}}) +// OGCG: define{{.*}} void @_Z12test_rsq_f64Pdd +// OGCG: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}}) +__device__ void test_rsq_f64(double* out, double a) { + *out = __builtin_amdgcn_rsq(a); +} + +// CIR-LABEL: @_Z18test_rsq_clamp_f32Pff +// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @_Z18test_rsq_clamp_f32Pff +// LLVM: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}}) +// OGCG: define{{.*}} void @_Z18test_rsq_clamp_f32Pff +// OGCG: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}}) +__device__ void test_rsq_clamp_f32(float* out, float a) +{ + *out = __builtin_amdgcn_rsq_clampf(a); +} + +// CIR-LABEL: @_Z18test_rsq_clamp_f64Pdd +// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @_Z18test_rsq_clamp_f64Pdd +// LLVM: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}}) +// OGCG: define{{.*}} void @_Z18test_rsq_clamp_f64Pdd +// OGCG: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}}) +__device__ void test_rsq_clamp_f64(double* out, double a) { + *out = __builtin_amdgcn_rsq_clamp(a); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl index 7d72125cafd0..de932525ccdf 100644 --- a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl @@ -38,3 +38,14 @@ void test_sqrt_bf16(global __bf16* out, __bf16 a) { *out = __builtin_amdgcn_sqrt_bf16(a); } + +// CIR-LABEL: @test_rsq_bf16 +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.bf16) -> !cir.bf16 +// LLVM: define{{.*}} void @test_rsq_bf16 +// LLVM: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_bf16 +// OGCG: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}}) +void test_rsq_bf16(__bf16* out, __bf16 a) +{ + *out = __builtin_amdgcn_rsq_bf16(a); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl index 021fe0050a11..e7479a24edfe 100644 --- a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-vi.cl @@ -87,3 +87,14 @@ void test_sqrt_f16(global half* out, half a) { *out = __builtin_amdgcn_sqrth(a); } + +// CIR-LABEL: @test_rsq_f16 +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.f16) -> !cir.f16 +// LLVM: define{{.*}} void @test_rsq_f16 +// LLVM: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_f16 +// OGCG: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}}) +void test_rsq_f16(global half* out, half a) +{ + *out = __builtin_amdgcn_rsqh(a); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl index feda227849c5..4bf6ffe95928 100644 --- a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl +++ b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl @@ -400,3 +400,43 @@ void test_sqrt_f32(global float* out, float a) { void test_sqrt_f64(global double* out, double a) { *out = __builtin_amdgcn_sqrt(a); } + +// CIR-LABEL: @test_rsq_f32 +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @test_rsq_f32 +// LLVM: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_f32 +// OGCG: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}}) +void test_rsq_f32(global float* out, float a) { + *out = __builtin_amdgcn_rsqf(a); +} + +// CIR-LABEL: @test_rsq_f64 +// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @test_rsq_f64 +// LLVM: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_f64 +// OGCG: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}}) +void test_rsq_f64(global double* out, double a) { + *out = __builtin_amdgcn_rsq(a); +} + +// CIR-LABEL: @test_rsq_clamp_f32 +// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.float) -> !cir.float +// LLVM: define{{.*}} void @test_rsq_clamp_f32 +// LLVM: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_clamp_f32 +// OGCG: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}}) +void test_rsq_clamp_f32(global float* out, float a) { + *out = __builtin_amdgcn_rsq_clampf(a); +} + +// CIR-LABEL: @test_rsq_clamp_f64 +// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.double) -> !cir.double +// LLVM: define{{.*}} void @test_rsq_clamp_f64 +// LLVM: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}}) +// OGCG: define{{.*}} void @test_rsq_clamp_f64 +// OGCG: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}}) +void test_rsq_clamp_f64(global double* out, double a) { + *out = __builtin_amdgcn_rsq_clamp(a); +}