Skip to content

Commit bff1370

Browse files
authored
Merge pull request #11721 from fhahn/fix-ext-vector-padding-poison
[Clang] Freeze padded vectors before storing. (llvm#164821) rdar://162655662
2 parents 4d15cb9 + 1c73391 commit bff1370

File tree

4 files changed

+79
-28
lines changed

4 files changed

+79
-28
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2716,9 +2716,13 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
27162716
CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts());
27172717
if (!ClangVecTy->isPackedVectorBoolType(getContext()) &&
27182718
VecTy != NewVecTy) {
2719-
SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
2719+
SmallVector<int, 16> Mask(NewVecTy->getNumElements(),
2720+
VecTy->getNumElements());
27202721
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
2721-
Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
2722+
// Use undef instead of poison for the padding lanes, to make sure no
2723+
// padding bits are poisoned, which may break coercion.
2724+
Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
2725+
Mask, "extractVec");
27222726
SrcTy = NewVecTy;
27232727
}
27242728
if (Addr.getElementType() != SrcTy)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
2+
// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
3+
4+
typedef float float3 __attribute__((ext_vector_type(3)));
5+
struct Vec3 {
6+
union {
7+
struct {
8+
float x;
9+
float y;
10+
float z;
11+
};
12+
float vec __attribute__((ext_vector_type(3)));
13+
};
14+
};
15+
16+
// CHECK-LABEL: define i128 @add(
17+
// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
18+
// CHECK-NEXT: [[ENTRY:.*:]]
19+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16
20+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16
21+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
22+
// CHECK-NEXT: store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
23+
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
24+
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
25+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
26+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
27+
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
28+
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
29+
// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
30+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
31+
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
32+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
33+
// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
34+
// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
35+
// CHECK-NEXT: ret i128 [[TMP3]]
36+
//
37+
struct Vec3 add(struct Vec3 a) {
38+
struct Vec3 res;
39+
res.vec = a.vec + a.vec;
40+
return res;
41+
}
42+

clang/test/CodeGenCXX/matrix-vector-bit-int.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
1919
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
2020
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
2121
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
22-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
22+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2323
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
2424
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
2525
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) {
3838
// CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
3939
// CHECK-NEXT: [[ENTRY:.*:]]
4040
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
41-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
41+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4242
// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
4343
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
4444
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) {
5757
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
5858
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
5959
// CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
60-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
60+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6161
// CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
6262
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
6363
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) {
8080
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
8181
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
8282
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
83-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
83+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8484
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
8585
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
8686
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>

clang/test/CodeGenOpenCL/preserve_vec3.cl

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ typedef float float4 __attribute__((ext_vector_type(4)));
1111
// CHECK-LABEL: define dso_local spir_kernel void @foo(
1212
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
1313
// CHECK-NEXT: [[ENTRY:.*:]]
14-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15-
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
16-
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
14+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
15+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
16+
// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[EXTRACTVEC_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
1718
// CHECK-NEXT: ret void
1819
//
1920
void kernel foo(global float3 *a, global float3 *b) {
@@ -24,8 +25,9 @@ void kernel foo(global float3 *a, global float3 *b) {
2425
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
2526
// CHECK-NEXT: [[ENTRY:.*:]]
2627
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
27-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
28-
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
28+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
29+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[ASTYPE_I]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
2931
// CHECK-NEXT: ret void
3032
//
3133
void kernel float4_to_float3(global float3 *a, global float4 *b) {
@@ -35,9 +37,9 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
3537
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
3638
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
3739
// CHECK-NEXT: [[ENTRY:.*:]]
38-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
39-
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
40-
// CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
40+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
41+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
42+
// CHECK-NEXT: store <4 x float> [[ASTYPE_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
4143
// CHECK-NEXT: ret void
4244
//
4345
void kernel float3_to_float4(global float3 *a, global float4 *b) {
@@ -47,8 +49,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
4749
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
4850
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
4951
// CHECK-NEXT: [[ENTRY:.*:]]
50-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
51-
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
52+
// CHECK-NEXT: [[LOADVECN_I:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
53+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN_I]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5254
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
5355
// CHECK-NEXT: ret void
5456
//
@@ -60,41 +62,43 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
6062
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
6163
// CHECK-NEXT: [[ENTRY:.*:]]
6264
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
63-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
64-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
65+
// CHECK-NEXT: [[ASTYPE_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
66+
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[ASTYPE_I]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
6568
// CHECK-NEXT: ret void
6669
//
6770
void kernel char8_to_short3(global short3 *a, global char8 *b) {
6871
*a = __builtin_astype(*b, short3);
6972
}
7073

7174
// CHECK-LABEL: define dso_local spir_func void @from_char3(
72-
// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
75+
// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
7376
// CHECK-NEXT: [[ENTRY:.*:]]
74-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
75-
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
77+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
78+
// CHECK-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA17:![0-9]+]]
7679
// CHECK-NEXT: ret void
7780
//
7881
void from_char3(char3 a, global int *out) {
7982
*out = __builtin_astype(a, int);
8083
}
8184

8285
// CHECK-LABEL: define dso_local spir_func void @from_short3(
83-
// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
86+
// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
8487
// CHECK-NEXT: [[ENTRY:.*:]]
85-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
86-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
88+
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
89+
// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA19:![0-9]+]]
8790
// CHECK-NEXT: ret void
8891
//
8992
void from_short3(short3 a, global long *out) {
9093
*out = __builtin_astype(a, long);
9194
}
9295

9396
// CHECK-LABEL: define dso_local spir_func void @scalar_to_char3(
94-
// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
97+
// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
9598
// CHECK-NEXT: [[ENTRY:.*:]]
9699
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
97-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
100+
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
101+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> <i8 undef, i8 poison, i8 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
98102
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[TBAA8]]
99103
// CHECK-NEXT: ret void
100104
//
@@ -103,10 +107,11 @@ void scalar_to_char3(int a, global char3 *out) {
103107
}
104108

105109
// CHECK-LABEL: define dso_local spir_func void @scalar_to_short3(
106-
// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
110+
// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
107111
// CHECK-NEXT: [[ENTRY:.*:]]
108112
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
109-
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
113+
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
114+
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
110115
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
111116
// CHECK-NEXT: ret void
112117
//

0 commit comments

Comments
 (0)