@@ -19,94 +19,177 @@ int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
1919double *interesting_doubles = get_interesting_doubles();
2020int numInterestingDoubles = sizeof (interesting_doubles_)/sizeof (interesting_doubles_[0 ]);
2121
22- void test_round () {
23- Ret_M128d (__m128d, _mm_ceil_pd);
24- Ret_M128 (__m128, _mm_ceil_ps);
25- Ret_M128d_M128d (__m128d, _mm_ceil_sd);
26- Ret_M128_M128 (__m128, _mm_ceil_ss);
27- Ret_M128d (__m128d, _mm_floor_pd);
28- Ret_M128 (__m128, _mm_floor_ps);
29- Ret_M128d_M128d (__m128d, _mm_floor_sd);
30- Ret_M128_M128 (__m128, _mm_floor_ss);
31- Ret_M128d_Tint (__m128d, _mm_round_pd);
32- Ret_M128_Tint (__m128, _mm_round_ps);
33- Ret_M128d_M128d_Tint (__m128d, _mm_round_sd);
34- Ret_M128_M128_Tint (__m128, _mm_round_ss);
35- }
36-
37- int main () {
38- assert (numInterestingFloats % 4 == 0 );
39- assert (numInterestingInts % 4 == 0 );
40- assert (numInterestingDoubles % 4 == 0 );
41-
42- test_round ();
22+ void NOINLINE test_ceil_pd () { Ret_M128d (__m128d, _mm_ceil_pd); }
23+ void NOINLINE test_ceil_ps () { Ret_M128 (__m128, _mm_ceil_ps); }
24+ void NOINLINE test_ceil_sd () { Ret_M128d_M128d (__m128d, _mm_ceil_sd); }
25+ void NOINLINE test_ceil_ss () { Ret_M128_M128 (__m128, _mm_ceil_ss); }
26+ void NOINLINE test_floor_pd () { Ret_M128d (__m128d, _mm_floor_pd); }
27+ void NOINLINE test_floor_ps () { Ret_M128 (__m128, _mm_floor_ps); }
28+ void NOINLINE test_floor_sd () { Ret_M128d_M128d (__m128d, _mm_floor_sd); }
29+ void NOINLINE test_floor_ss () { Ret_M128_M128 (__m128, _mm_floor_ss); }
30+ void NOINLINE test_round_pd () { Ret_M128d_Tint (__m128d, _mm_round_pd); }
31+ void NOINLINE test_round_ps () { Ret_M128_Tint (__m128, _mm_round_ps); }
32+ void NOINLINE test_round_sd () { Ret_M128d_M128d_Tint (__m128d, _mm_round_sd); }
33+ void NOINLINE test_round_ss () { Ret_M128_M128_Tint (__m128, _mm_round_ss); }
34+ void NOINLINE test_blend_epi16 () { Ret_M128i_M128i_Tint (__m128i, _mm_blend_epi16); }
35+ void NOINLINE test_blend_pd () { Ret_M128d_M128d_Tint (__m128d, _mm_blend_pd); }
36+ void NOINLINE test_blend_ps () { Ret_M128_M128_Tint (__m128, _mm_blend_ps); }
37+ void NOINLINE test_blendv_epi8 () { Ret_M128i_M128i_M128i (__m128i, _mm_blendv_epi8); }
38+ void NOINLINE test_blendv_pd () { Ret_M128d_M128d_M128d (__m128d, _mm_blendv_pd); }
39+ void NOINLINE test_blendv_ps () { Ret_M128_M128_M128 (__m128, _mm_blendv_ps); }
40+ void NOINLINE test_cvtepi16_epi32 () { Ret_M128i (__m128i, _mm_cvtepi16_epi32); }
41+ void NOINLINE test_cvtepi16_epi64 () { Ret_M128i (__m128i, _mm_cvtepi16_epi64); }
42+ void NOINLINE test_cvtepi32_epi64 () { Ret_M128i (__m128i, _mm_cvtepi32_epi64); }
43+ void NOINLINE test_cvtepi8_epi16 () { Ret_M128i (__m128i, _mm_cvtepi8_epi16); }
44+ void NOINLINE test_cvtepi8_epi32 () { Ret_M128i (__m128i, _mm_cvtepi8_epi32); }
45+ void NOINLINE test_cvtepi8_epi64 () { Ret_M128i (__m128i, _mm_cvtepi8_epi64); }
46+ void NOINLINE test_cvtepu16_epi32 () { Ret_M128i (__m128i, _mm_cvtepu16_epi32); }
47+ void NOINLINE test_cvtepu16_epi64 () { Ret_M128i (__m128i, _mm_cvtepu16_epi64); }
48+ void NOINLINE test_cvtepu32_epi64 () { Ret_M128i (__m128i, _mm_cvtepu32_epi64); }
49+ void NOINLINE test_cvtepu8_epi16 () { Ret_M128i (__m128i, _mm_cvtepu8_epi16); }
50+ void NOINLINE test_cvtepu8_epi32 () { Ret_M128i (__m128i, _mm_cvtepu8_epi32); }
51+ void NOINLINE test_cvtepu8_epi64 () { Ret_M128i (__m128i, _mm_cvtepu8_epi64); }
52+ void NOINLINE test_extract_epi32 () { Ret_M128i_Tint (int , _mm_extract_epi32); }
53+ void NOINLINE test_extract_epi64 () { Ret_M128i_Tint (int64_t , _mm_extract_epi64); }
54+ void NOINLINE test_extract_epi8 () { Ret_M128i_Tint (int , _mm_extract_epi8); }
55+ void NOINLINE test_extract_ps () { Ret_M128_Tint (float , _mm_extract_ps); }
56+ void NOINLINE test_insert_epi32 () { Ret_M128i_int_Tint (__m128i, _mm_insert_epi32); }
57+ void NOINLINE test_insert_epi64 () { Ret_M128i_int_Tint (__m128i, _mm_insert_epi64); }
58+ void NOINLINE test_insert_ps () { Ret_M128_M128_Tint (__m128, _mm_insert_ps); }
59+ void NOINLINE test_max_epi32 () { Ret_M128i_M128i (__m128i, _mm_max_epi32); }
60+ void NOINLINE test_max_epi8 () { Ret_M128i_M128i (__m128i, _mm_max_epi8); }
61+ void NOINLINE test_max_epu16 () { Ret_M128i_M128i (__m128i, _mm_max_epu16); }
62+ void NOINLINE test_max_epu32 () { Ret_M128i_M128i (__m128i, _mm_max_epu32); }
63+ void NOINLINE test_min_epi32 () { Ret_M128i_M128i (__m128i, _mm_min_epi32); }
64+ void NOINLINE test_min_epi8 () { Ret_M128i_M128i (__m128i, _mm_min_epi8); }
65+ void NOINLINE test_min_epu16 () { Ret_M128i_M128i (__m128i, _mm_min_epu16); }
66+ void NOINLINE test_min_epu32 () { Ret_M128i_M128i (__m128i, _mm_min_epu32); }
67+ void NOINLINE test_test_cmpeq_epi64 () { Ret_M128i_M128i (__m128i, _mm_cmpeq_epi64); }
68+ void NOINLINE test_test_minpos_epu16 () { Ret_M128i (__m128i, _mm_minpos_epu16); }
69+ void NOINLINE test_test_mpsadbw_epu8 () { Ret_M128i_M128i_Tint (__m128i, _mm_mpsadbw_epu8); }
70+ void NOINLINE test_testmul_epi32 () { Ret_M128i_M128i (__m128i, _mm_mul_epi32); }
71+ void NOINLINE test_test_mullo_epi32 () { Ret_M128i_M128i (__m128i, _mm_mullo_epi32); }
72+ void NOINLINE test_test_packus_epi32 () { Ret_M128i_M128i (__m128i, _mm_packus_epi32); }
73+ void NOINLINE test_test_stream_load_si128 () { Ret_IntPtr (__m128i, _mm_stream_load_si128, __m128i*, 4 , 4 ); }
4374
44- Ret_M128i_M128i_Tint (__m128i, _mm_blend_epi16);
45- Ret_M128d_M128d_Tint (__m128d, _mm_blend_pd);
46- Ret_M128_M128_Tint (__m128, _mm_blend_ps);
47- Ret_M128i_M128i_M128i (__m128i, _mm_blendv_epi8);
48- Ret_M128d_M128d_M128d (__m128d, _mm_blendv_pd);
49- Ret_M128_M128_M128 (__m128, _mm_blendv_ps);
50- Ret_M128i_M128i (__m128i, _mm_cmpeq_epi64);
51- Ret_M128i (__m128i, _mm_cvtepi16_epi32);
52- Ret_M128i (__m128i, _mm_cvtepi16_epi64);
53- Ret_M128i (__m128i, _mm_cvtepi32_epi64);
54- Ret_M128i (__m128i, _mm_cvtepi8_epi16);
55- Ret_M128i (__m128i, _mm_cvtepi8_epi32);
56- Ret_M128i (__m128i, _mm_cvtepi8_epi64);
57- Ret_M128i (__m128i, _mm_cvtepu16_epi32);
58- Ret_M128i (__m128i, _mm_cvtepu16_epi64);
59- Ret_M128i (__m128i, _mm_cvtepu32_epi64);
60- Ret_M128i (__m128i, _mm_cvtepu8_epi16);
61- Ret_M128i (__m128i, _mm_cvtepu8_epi32);
62- Ret_M128i (__m128i, _mm_cvtepu8_epi64);
75+ void NOINLINE test_dp_pd () {
76+ bool oldTestNaNBits = testNaNBits;
6377 testNaNBits = false ;
6478 Ret_M128d_M128d_Tint (__m128d, _mm_dp_pd);
79+ testNaNBits = oldTestNaNBits;
80+ }
81+ void NOINLINE test_dp_ps () {
82+ bool oldTestNaNBits = testNaNBits;
83+ testNaNBits = false ;
6584 Ret_M128_M128_Tint (__m128, _mm_dp_ps); // _mm_dp_ps emulation does not match NaN bit selection rules (seems to be unspecified)
66- testNaNBits = true ;
67- Ret_M128i_Tint (int , _mm_extract_epi32);
68- Ret_M128i_Tint (int64_t , _mm_extract_epi64);
69- Ret_M128i_Tint (int , _mm_extract_epi8);
70- Ret_M128_Tint (float , _mm_extract_ps);
71- Ret_M128i_int_Tint (__m128i, _mm_insert_epi32);
72- Ret_M128i_int_Tint (__m128i, _mm_insert_epi64);
73- Ret_M128_M128_Tint (__m128, _mm_insert_ps);
74- Ret_M128i_M128i (__m128i, _mm_max_epi32);
75- Ret_M128i_M128i (__m128i, _mm_max_epi8);
76- Ret_M128i_M128i (__m128i, _mm_max_epu16);
77- Ret_M128i_M128i (__m128i, _mm_max_epu32);
78- Ret_M128i_M128i (__m128i, _mm_min_epi32);
79- Ret_M128i_M128i (__m128i, _mm_min_epi8);
80- Ret_M128i_M128i (__m128i, _mm_min_epu16);
81- Ret_M128i_M128i (__m128i, _mm_min_epu32);
82- Ret_M128i (__m128i, _mm_minpos_epu16);
83- Ret_M128i_M128i_Tint (__m128i, _mm_mpsadbw_epu8);
84- Ret_M128i_M128i (__m128i, _mm_mul_epi32);
85- Ret_M128i_M128i (__m128i, _mm_mullo_epi32);
86- Ret_M128i_M128i (__m128i, _mm_packus_epi32);
87- Ret_IntPtr (__m128i, _mm_stream_load_si128, __m128i*, 4 , 4 );
85+ testNaNBits = oldTestNaNBits;
86+ }
87+
88+ void NOINLINE test_test_all_ones () {
8889 Ret_M128i (int , _mm_test_all_ones);
8990 printf (" _mm_test_all_ones(0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
9091 printf (" _mm_test_all_ones(0xFFFFFFFFFFFFFFFEull): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull )));
9192 printf (" _mm_test_all_ones(0): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0 )));
93+ }
94+
95+ void NOINLINE test_test_all_zeros () {
9296 Ret_M128i_M128i (int , _mm_test_all_zeros);
9397 printf (" _mm_test_all_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
9498 printf (" _mm_test_all_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
9599 printf (" _mm_test_all_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
100+ }
101+
102+ void NOINLINE test_test_mix_ones_zeros () {
96103 Ret_M128i_M128i (int , _mm_test_mix_ones_zeros);
97104 printf (" _mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
98105 printf (" _mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
99106 printf (" _mm_test_mix_ones_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
107+ }
108+
109+ void NOINLINE test_testc () {
100110 Ret_M128i_M128i (int , _mm_testc_si128);
101111 printf (" _mm_testc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
102112 printf (" _mm_testc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
103113 printf (" _mm_testc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
114+ }
115+
116+ void NOINLINE test_testnzc () {
104117 Ret_M128i_M128i (int , _mm_testnzc_si128);
105118 printf (" _mm_testnzc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
106119 printf (" _mm_testnzc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
107120 printf (" _mm_testnzc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
121+ }
122+
123+ void NOINLINE test_testz () {
108124 Ret_M128i_M128i (int , _mm_testz_si128);
109125 printf (" _mm_testz_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
110126 printf (" _mm_testz_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
111127 printf (" _mm_testz_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
112128}
129+
130+ int main () {
131+ assert (numInterestingFloats % 4 == 0 );
132+ assert (numInterestingInts % 4 == 0 );
133+ assert (numInterestingDoubles % 4 == 0 );
134+
135+ test_ceil_pd ();
136+ test_ceil_ps ();
137+ test_ceil_sd ();
138+ test_ceil_ss ();
139+ test_floor_pd ();
140+ test_floor_ps ();
141+ test_floor_sd ();
142+ test_floor_ss ();
143+ test_round_pd ();
144+ test_round_ps ();
145+ test_round_sd ();
146+ test_round_ss ();
147+ test_blend_epi16 ();
148+ test_blend_pd ();
149+ test_blend_ps ();
150+ test_blendv_epi8 ();
151+ test_blendv_pd ();
152+ test_blendv_ps ();
153+ test_cvtepi16_epi32 ();
154+ test_cvtepi16_epi64 ();
155+ test_cvtepi32_epi64 ();
156+ test_cvtepi8_epi16 ();
157+ test_cvtepi8_epi32 ();
158+ test_cvtepi8_epi64 ();
159+ test_cvtepu16_epi32 ();
160+ test_cvtepu16_epi64 ();
161+ test_cvtepu32_epi64 ();
162+ test_cvtepu8_epi16 ();
163+ test_cvtepu8_epi32 ();
164+ test_cvtepu8_epi64 ();
165+ test_extract_epi32 ();
166+ test_extract_epi64 ();
167+ test_extract_epi8 ();
168+ test_extract_ps ();
169+ test_insert_epi32 ();
170+ test_insert_epi64 ();
171+ test_insert_ps ();
172+ test_max_epi32 ();
173+ test_max_epi8 ();
174+ test_max_epu16 ();
175+ test_max_epu32 ();
176+ test_min_epi32 ();
177+ test_min_epi8 ();
178+ test_min_epu16 ();
179+ test_min_epu32 ();
180+ test_test_cmpeq_epi64 ();
181+ test_test_minpos_epu16 ();
182+ test_test_mpsadbw_epu8 ();
183+ test_testmul_epi32 ();
184+ test_test_mullo_epi32 ();
185+ test_test_packus_epi32 ();
186+ test_test_stream_load_si128 ();
187+ test_dp_pd ();
188+ test_dp_ps ();
189+ test_test_all_ones ();
190+ test_test_all_zeros ();
191+ test_test_mix_ones_zeros ();
192+ test_testc ();
193+ test_testnzc ();
194+ test_testz ();
195+ }
0 commit comments