Skip to content

Commit c74d00e

Browse files
committed
[CIR][AMDGPU] Add lowering for amdgcn sqrt builtins
1 parent 4916f0e commit c74d00e

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
175175
case AMDGPU::BI__builtin_amdgcn_sqrtf:
176176
case AMDGPU::BI__builtin_amdgcn_sqrth:
177177
case AMDGPU::BI__builtin_amdgcn_sqrt_bf16: {
178-
llvm_unreachable("sqrt_* NYI");
178+
return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.sqrt")
179+
.getScalarVal();
179180
}
180181
case AMDGPU::BI__builtin_amdgcn_rsq:
181182
case AMDGPU::BI__builtin_amdgcn_rsqf:

clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,3 +305,23 @@ __device__ void test_readlane(int* out, int a, int b) {
305305
__device__ void test_readfirstlane(int* out, int a) {
306306
*out = __builtin_amdgcn_readfirstlane(a);
307307
}
308+
309+
// CIR-LABEL: @_Z13test_sqrt_f32Pff
310+
// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
311+
// LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff
312+
// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
313+
// OGCG: define{{.*}} void @_Z13test_sqrt_f32Pff
314+
// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
315+
__device__ void test_sqrt_f32(float* out, float a) {
316+
*out = __builtin_amdgcn_sqrtf(a);
317+
}
318+
319+
// CIR-LABEL: @_Z13test_sqrt_f64Pdd
320+
// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
321+
// LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd
322+
// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
323+
// OGCG: define{{.*}} void @_Z13test_sqrt_f64Pdd
324+
// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
325+
__device__ void test_sqrt_f64(double* out, double a) {
326+
*out = __builtin_amdgcn_sqrt(a);
327+
}

clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,23 @@ void test_readlane(global int* out, int a, int b) {
318318
void test_readfirstlane(global int* out, int a) {
319319
*out = __builtin_amdgcn_readfirstlane(a);
320320
}
321+
322+
// CIR-LABEL: @test_sqrt_f32
323+
// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
324+
// LLVM: define{{.*}} void @test_sqrt_f32
325+
// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
326+
// OGCG: define{{.*}} void @test_sqrt_f32
327+
// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
328+
void test_sqrt_f32(global float* out, float a) {
329+
*out = __builtin_amdgcn_sqrtf(a);
330+
}
331+
332+
// CIR-LABEL: @test_sqrt_f64
333+
// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
334+
// LLVM: define{{.*}} void @test_sqrt_f64
335+
// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
336+
// OGCG: define{{.*}} void @test_sqrt_f64
337+
// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
338+
void test_sqrt_f64(global double* out, double a) {
339+
*out = __builtin_amdgcn_sqrt(a);
340+
}

0 commit comments

Comments
 (0)