[CIR][AMDGPU] Add lowering for amdgcn sqrt builtins

skc7 · skc7 · commit c74d00eb9a38 · 2025-12-09T12:14:19.000+05:30
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -175,7 +175,8 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_sqrtf:
   case AMDGPU::BI__builtin_amdgcn_sqrth:
   case AMDGPU::BI__builtin_amdgcn_sqrt_bf16: {
-    llvm_unreachable("sqrt_* NYI");
+    return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.sqrt")
+        .getScalarVal();
   }
   case AMDGPU::BI__builtin_amdgcn_rsq:
   case AMDGPU::BI__builtin_amdgcn_rsqf:
diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip
@@ -305,3 +305,23 @@ __device__ void test_readlane(int* out, int a, int b) {
 __device__ void test_readfirstlane(int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @_Z13test_sqrt_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_sqrt_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+__device__ void test_sqrt_f32(float* out, float a) {
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CIR-LABEL: @_Z13test_sqrt_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_sqrt_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+__device__ void test_sqrt_f64(double* out, double a) {
+  *out = __builtin_amdgcn_sqrt(a);
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl
@@ -318,3 +318,23 @@ void test_readlane(global int* out, int a, int b) {
 void test_readfirstlane(global int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @test_sqrt_f32
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_sqrt_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+void test_sqrt_f32(global float* out, float a) {
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CIR-LABEL: @test_sqrt_f64
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_sqrt_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+void test_sqrt_f64(global double* out, double a) {
+  *out = __builtin_amdgcn_sqrt(a);
+}