@@ -6398,10 +6398,8 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
6398
6398
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
6399
6399
; GFX8-NEXT: s_waitcnt vmcnt(0)
6400
6400
; GFX8-NEXT: v_mov_b32_e32 v5, v0
6401
- ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
6402
- ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6401
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
6403
6402
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
6404
- ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6405
6403
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
6406
6404
; GFX8-NEXT: v_mov_b32_e32 v0, v4
6407
6405
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -6627,10 +6625,8 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin
6627
6625
; GFX8-NEXT: .LBB20_1: ; %atomicrmw.start
6628
6626
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
6629
6627
; GFX8-NEXT: s_waitcnt vmcnt(0)
6630
- ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
6631
- ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6628
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
6632
6629
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
6633
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6634
6630
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
6635
6631
; GFX8-NEXT: v_mov_b32_e32 v5, v2
6636
6632
; GFX8-NEXT: v_mov_b32_e32 v4, v1
@@ -7048,9 +7044,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall
7048
7044
; GFX8-NEXT: ; =>This Loop Header: Depth=1
7049
7045
; GFX8-NEXT: ; Child Loop BB21_4 Depth 2
7050
7046
; GFX8-NEXT: s_waitcnt vmcnt(0)
7051
- ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v8
7052
- ; GFX8-NEXT: v_add_f16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7053
- ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
7047
+ ; GFX8-NEXT: v_add_f16_sdwa v4, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7054
7048
; GFX8-NEXT: v_add_f16_e32 v6, v8, v5
7055
7049
; GFX8-NEXT: v_or_b32_e32 v7, v6, v4
7056
7050
; GFX8-NEXT: v_mov_b32_e32 v6, v7
@@ -7396,10 +7390,8 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp
7396
7390
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7397
7391
; GFX8-NEXT: s_waitcnt vmcnt(0)
7398
7392
; GFX8-NEXT: v_mov_b32_e32 v5, v0
7399
- ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
7400
- ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7393
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7401
7394
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
7402
- ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
7403
7395
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
7404
7396
; GFX8-NEXT: v_mov_b32_e32 v0, v4
7405
7397
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -7658,10 +7650,8 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace(
7658
7650
; GFX8-NEXT: .LBB23_1: ; %atomicrmw.start
7659
7651
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7660
7652
; GFX8-NEXT: s_waitcnt vmcnt(0)
7661
- ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7662
- ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7653
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7663
7654
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
7664
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
7665
7655
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
7666
7656
; GFX8-NEXT: v_mov_b32_e32 v5, v2
7667
7657
; GFX8-NEXT: v_mov_b32_e32 v4, v1
@@ -7925,10 +7915,8 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no
7925
7915
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
7926
7916
; GFX8-NEXT: s_waitcnt vmcnt(0)
7927
7917
; GFX8-NEXT: v_mov_b32_e32 v5, v0
7928
- ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5
7929
- ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7918
+ ; GFX8-NEXT: v_add_f16_sdwa v0, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
7930
7919
; GFX8-NEXT: v_add_f16_e32 v1, v5, v2
7931
- ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
7932
7920
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
7933
7921
; GFX8-NEXT: v_mov_b32_e32 v0, v4
7934
7922
; GFX8-NEXT: v_mov_b32_e32 v1, v5
@@ -8187,10 +8175,8 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem
8187
8175
; GFX8-NEXT: .LBB25_1: ; %atomicrmw.start
8188
8176
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
8189
8177
; GFX8-NEXT: s_waitcnt vmcnt(0)
8190
- ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v2
8191
- ; GFX8-NEXT: v_add_f16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
8178
+ ; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
8192
8179
; GFX8-NEXT: v_add_f16_e32 v4, v2, v0
8193
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8194
8180
; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
8195
8181
; GFX8-NEXT: v_mov_b32_e32 v5, v2
8196
8182
; GFX8-NEXT: v_mov_b32_e32 v4, v1
0 commit comments