Merged
Conversation
Contributor
Author
This stack of pull requests is managed by Graphite. Learn more about stacking. |
Member
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesRound out the AMDGPU codegen test to all the generations to cover Patch is 430.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131844.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 894d717bbbbd5..f51aac4021ae3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2763,7 +2763,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to promote this operator's result!");
- case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::BITCAST:
+ case ISD::FREEZE:
+ R = PromoteFloatRes_BITCAST(N);
+ break;
case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 42d6e57585345..96725e6996e3d 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1,10 +1,90 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,6 +107,80 @@ define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v3i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v3i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,6 +203,74 @@ define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,6 +293,96 @@ define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v5i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v5i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v5i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v5i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v5i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v5i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v5i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,6 +437,96 @@ define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v6i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v6i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v6i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v6i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v6i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX...
[truncated]
|
frederik-h
reviewed
Mar 18, 2025
Contributor
frederik-h
left a comment
There was a problem hiding this comment.
I can confirm that this fixes the issue that I saw with PR #130988.
RKSimon
reviewed
Mar 18, 2025
b8cf2b1 to
c9dd94f
Compare
ded45cf to
7a456ba
Compare
Base automatically changed from
users/arsenm/amdgpu/add-more-freeze-codegen-tests
to
main
March 19, 2025 03:18
Round out the AMDGPU codegen test to all the generations to cover the illegal f16 targets.
c9dd94f to
0b4c8b3
Compare
RKSimon
approved these changes
Mar 19, 2025
Collaborator
RKSimon
left a comment
There was a problem hiding this comment.
SGTM - but I may have missed some of the nuances of freeze of float types
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.

Round out the AMDGPU codegen test to all the generations to cover
the illegal f16 targets.