Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 3987852

Browse files
committed
[X86] Remove and autoupgrade vpmovqd/vpmovwb intrinsics using trunc+select.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351729 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent aac33c8 commit 3987852

13 files changed

+214
-90
lines changed

include/llvm/IR/IntrinsicsX86.td

-16
Original file line numberDiff line numberDiff line change
@@ -4444,10 +4444,6 @@ let TargetPrefix = "x86" in {
44444444
Intrinsic<[],
44454445
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
44464446
[IntrArgMemOnly]>;
4447-
def int_x86_avx512_mask_pmov_qd_256 : // FIXME: Replace with trunc+select.
4448-
Intrinsic<[llvm_v4i32_ty],
4449-
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
4450-
[IntrNoMem]>;
44514447
def int_x86_avx512_mask_pmov_qd_mem_256 :
44524448
GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
44534449
Intrinsic<[],
@@ -4473,10 +4469,6 @@ let TargetPrefix = "x86" in {
44734469
Intrinsic<[],
44744470
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
44754471
[IntrArgMemOnly]>;
4476-
def int_x86_avx512_mask_pmov_qd_512 : // FIXME: Replace with trunc+select.
4477-
Intrinsic<[llvm_v8i32_ty],
4478-
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
4479-
[IntrNoMem]>;
44804472
def int_x86_avx512_mask_pmov_qd_mem_512 :
44814473
GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
44824474
Intrinsic<[],
@@ -4710,10 +4702,6 @@ let TargetPrefix = "x86" in {
47104702
Intrinsic<[],
47114703
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
47124704
[IntrArgMemOnly]>;
4713-
def int_x86_avx512_mask_pmov_wb_256 : // FIXME: Replace with trunc+select.
4714-
Intrinsic<[llvm_v16i8_ty],
4715-
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
4716-
[IntrNoMem]>;
47174705
def int_x86_avx512_mask_pmov_wb_mem_256 :
47184706
GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
47194707
Intrinsic<[],
@@ -4739,10 +4727,6 @@ let TargetPrefix = "x86" in {
47394727
Intrinsic<[],
47404728
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
47414729
[IntrArgMemOnly]>;
4742-
def int_x86_avx512_mask_pmov_wb_512 : // FIXME: Replace with trunc+select.
4743-
Intrinsic<[llvm_v32i8_ty],
4744-
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
4745-
[IntrNoMem]>;
47464730
def int_x86_avx512_mask_pmov_wb_mem_512 :
47474731
GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
47484732
Intrinsic<[],

lib/IR/AutoUpgrade.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
299299
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
300300
Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
301301
Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
302+
Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
303+
Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
304+
Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
305+
Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
302306
Name == "sse.cvtsi2ss" || // Added in 7.0
303307
Name == "sse.cvtsi642ss" || // Added in 7.0
304308
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -2131,6 +2135,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
21312135
if (CI->getNumArgOperands() == 3)
21322136
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
21332137
CI->getArgOperand(1));
2138+
} else if (Name == "avx512.mask.pmov.qd.256" ||
2139+
Name == "avx512.mask.pmov.qd.512" ||
2140+
Name == "avx512.mask.pmov.wb.256" ||
2141+
Name == "avx512.mask.pmov.wb.512") {
2142+
Type *Ty = CI->getArgOperand(1)->getType();
2143+
Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2144+
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2145+
CI->getArgOperand(1));
21342146
} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
21352147
Name == "avx2.vbroadcasti128")) {
21362148
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.

lib/Target/X86/X86IntrinsicsInfo.h

-8
Original file line numberDiff line numberDiff line change
@@ -739,10 +739,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
739739
X86ISD::VTRUNC, X86ISD::VMTRUNC),
740740
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
741741
X86ISD::VTRUNC, X86ISD::VMTRUNC),
742-
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
743-
ISD::TRUNCATE, 0),
744-
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
745-
ISD::TRUNCATE, 0),
746742
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
747743
X86ISD::VTRUNC, X86ISD::VMTRUNC),
748744
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
@@ -751,10 +747,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
751747
ISD::TRUNCATE, X86ISD::VMTRUNC),
752748
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
753749
X86ISD::VTRUNC, X86ISD::VMTRUNC),
754-
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
755-
ISD::TRUNCATE, 0),
756-
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
757-
ISD::TRUNCATE, 0),
758750
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
759751
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
760752
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,

test/CodeGen/X86/avx512-intrinsics-upgrade.ll

+31
Original file line numberDiff line numberDiff line change
@@ -9703,3 +9703,34 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
97039703
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
97049704
ret < 4 x float> %res
97059705
}
9706+
9707+
declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
9708+
9709+
define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
9710+
; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
9711+
; X86: ## %bb.0:
9712+
; X86-NEXT: vpmovqd %zmm0, %ymm2 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc2]
9713+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9714+
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9715+
; X86-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
9716+
; X86-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
9717+
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0xfe,0xc0]
9718+
; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0xc5,0xed,0xfe,0xc0]
9719+
; X86-NEXT: retl ## encoding: [0xc3]
9720+
;
9721+
; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
9722+
; X64: ## %bb.0:
9723+
; X64-NEXT: vpmovqd %zmm0, %ymm2 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc2]
9724+
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9725+
; X64-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
9726+
; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
9727+
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0xfe,0xc0]
9728+
; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0xc5,0xed,0xfe,0xc0]
9729+
; X64-NEXT: retq ## encoding: [0xc3]
9730+
%res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
9731+
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
9732+
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
9733+
%res3 = add <8 x i32> %res0, %res1
9734+
%res4 = add <8 x i32> %res3, %res2
9735+
ret <8 x i32> %res4
9736+
}

test/CodeGen/X86/avx512-intrinsics.ll

+15-13
Original file line numberDiff line numberDiff line change
@@ -2813,24 +2813,26 @@ define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1,
28132813
ret void
28142814
}
28152815

2816-
declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
2817-
28182816
define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
28192817
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
28202818
; CHECK: ## %bb.0:
2819+
; CHECK-NEXT: vpmovqd %zmm0, %ymm2
28212820
; CHECK-NEXT: kmovw %edi, %k1
2822-
; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
28232821
; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
2824-
; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
2825-
; CHECK-NEXT: vpmovqd %zmm0, %ymm0
2826-
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
2827-
; CHECK-NEXT: retq
2828-
%res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
2829-
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
2830-
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
2831-
%res3 = add <8 x i32> %res0, %res1
2832-
%res4 = add <8 x i32> %res3, %res2
2833-
ret <8 x i32> %res4
2822+
; CHECK-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
2823+
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
2824+
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
2825+
; CHECK-NEXT: retq
2826+
%1 = trunc <8 x i64> %x0 to <8 x i32>
2827+
%2 = trunc <8 x i64> %x0 to <8 x i32>
2828+
%3 = bitcast i8 %x2 to <8 x i1>
2829+
%4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %x1
2830+
%5 = trunc <8 x i64> %x0 to <8 x i32>
2831+
%6 = bitcast i8 %x2 to <8 x i1>
2832+
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
2833+
%res3 = add <8 x i32> %1, %4
2834+
%res4 = add <8 x i32> %res3, %7
2835+
ret <8 x i32> %res4
28342836
}
28352837

28362838
declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)

test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

+30
Original file line numberDiff line numberDiff line change
@@ -3984,3 +3984,33 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
39843984
%res4 = add <32 x i16> %res3, %res2
39853985
ret <32 x i16> %res4
39863986
}
3987+
3988+
declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
3989+
3990+
define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
3991+
; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
3992+
; X86: # %bb.0:
3993+
; X86-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
3994+
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3995+
; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
3996+
; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
3997+
; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
3998+
; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
3999+
; X86-NEXT: retl # encoding: [0xc3]
4000+
;
4001+
; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4002+
; X64: # %bb.0:
4003+
; X64-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
4004+
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4005+
; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4006+
; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4007+
; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
4008+
; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
4009+
; X64-NEXT: retq # encoding: [0xc3]
4010+
%res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
4011+
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
4012+
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
4013+
%res3 = add <32 x i8> %res0, %res1
4014+
%res4 = add <32 x i8> %res3, %res2
4015+
ret <32 x i8> %res4
4016+
}

test/CodeGen/X86/avx512bw-intrinsics.ll

+19-17
Original file line numberDiff line numberDiff line change
@@ -890,34 +890,36 @@ define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x
890890
ret <32 x i16> %res2
891891
}
892892

893-
declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
894-
895893
define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
896894
; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
897895
; X86: # %bb.0:
896+
; X86-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
898897
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
899898
; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
900-
; X86-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2]
901-
; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
902-
; X86-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
903-
; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
899+
; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
900+
; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
901+
; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
904902
; X86-NEXT: retl # encoding: [0xc3]
905903
;
906904
; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
907905
; X64: # %bb.0:
906+
; X64-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
908907
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
909908
; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
910-
; X64-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2]
911-
; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
912-
; X64-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
913-
; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
914-
; X64-NEXT: retq # encoding: [0xc3]
915-
%res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
916-
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
917-
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
918-
%res3 = add <32 x i8> %res0, %res1
919-
%res4 = add <32 x i8> %res3, %res2
920-
ret <32 x i8> %res4
909+
; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
910+
; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
911+
; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
912+
; X64-NEXT: retq # encoding: [0xc3]
913+
%1 = trunc <32 x i16> %x0 to <32 x i8>
914+
%2 = trunc <32 x i16> %x0 to <32 x i8>
915+
%3 = bitcast i32 %x2 to <32 x i1>
916+
%4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> %x1
917+
%5 = trunc <32 x i16> %x0 to <32 x i8>
918+
%6 = bitcast i32 %x2 to <32 x i1>
919+
%7 = select <32 x i1> %6, <32 x i8> %5, <32 x i8> zeroinitializer
920+
%res3 = add <32 x i8> %1, %4
921+
%res4 = add <32 x i8> %res3, %7
922+
ret <32 x i8> %res4
921923
}
922924

923925
declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)

test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll

+32
Original file line numberDiff line numberDiff line change
@@ -8808,3 +8808,35 @@ define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1
88088808
%res4 = add <8 x i16> %res3, %res2
88098809
ret <8 x i16> %res4
88108810
}
8811+
8812+
declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
8813+
8814+
define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
8815+
; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
8816+
; X86: # %bb.0:
8817+
; X86-NEXT: vpmovwb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc2]
8818+
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8819+
; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
8820+
; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
8821+
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
8822+
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
8823+
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8824+
; X86-NEXT: retl # encoding: [0xc3]
8825+
;
8826+
; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
8827+
; X64: # %bb.0:
8828+
; X64-NEXT: vpmovwb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc2]
8829+
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8830+
; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
8831+
; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
8832+
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
8833+
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
8834+
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8835+
; X64-NEXT: retq # encoding: [0xc3]
8836+
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
8837+
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
8838+
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
8839+
%res3 = add <16 x i8> %res0, %res1
8840+
%res4 = add <16 x i8> %res3, %res2
8841+
ret <16 x i8> %res4
8842+
}

test/CodeGen/X86/avx512bwvl-intrinsics.ll

+18-16
Original file line numberDiff line numberDiff line change
@@ -1611,36 +1611,38 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1,
16111611
ret void
16121612
}
16131613

1614-
declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
1615-
16161614
define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
16171615
; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
16181616
; X86: # %bb.0:
1617+
; X86-NEXT: vpmovwb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc2]
16191618
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
16201619
; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1621-
; X86-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2]
1622-
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
1623-
; X86-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
1624-
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
1620+
; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1621+
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1622+
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
16251623
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
16261624
; X86-NEXT: retl # encoding: [0xc3]
16271625
;
16281626
; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
16291627
; X64: # %bb.0:
1628+
; X64-NEXT: vpmovwb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc2]
16301629
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1631-
; X64-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2]
16321630
; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1633-
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
1634-
; X64-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
1635-
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
1631+
; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1632+
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1633+
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
16361634
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
16371635
; X64-NEXT: retq # encoding: [0xc3]
1638-
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1639-
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1640-
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1641-
%res3 = add <16 x i8> %res0, %res1
1642-
%res4 = add <16 x i8> %res3, %res2
1643-
ret <16 x i8> %res4
1636+
%1 = trunc <16 x i16> %x0 to <16 x i8>
1637+
%2 = trunc <16 x i16> %x0 to <16 x i8>
1638+
%3 = bitcast i16 %x2 to <16 x i1>
1639+
%4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> %x1
1640+
%5 = trunc <16 x i16> %x0 to <16 x i8>
1641+
%6 = bitcast i16 %x2 to <16 x i1>
1642+
%7 = select <16 x i1> %6, <16 x i8> %5, <16 x i8> zeroinitializer
1643+
%res3 = add <16 x i8> %1, %4
1644+
%res4 = add <16 x i8> %res3, %7
1645+
ret <16 x i8> %res4
16441646
}
16451647

16461648
declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)

0 commit comments

Comments
 (0)