Skip to content

Commit 55c1674

Browse files
committed
Display core time and wallclock times when test suite finishes, to be able to easily observe/benchmark test suite speedup optimizations.
1 parent 9e57126 commit 55c1674

3 files changed

Lines changed: 161 additions & 68 deletions

File tree

test/parallel_testsuite.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def combine_results(self, result, buffered_results):
9090
# Sort the results back into alphabetical order. Running the tests in
9191
# parallel causes mis-orderings, this makes the results more readable.
9292
results = sorted(buffered_results, key=lambda res: str(res.test))
93+
result.core_time = 0
9394
for r in results:
9495
r.updateResult(result)
9596
return result
@@ -102,18 +103,21 @@ class BufferedParallelTestResult:
102103
"""
103104
def __init__(self):
104105
self.buffered_result = None
106+
self.test_duration = 0
105107

106108
@property
107109
def test(self):
108110
return self.buffered_result.test
109111

110-
def addDuration(self, test, elapsed):
111-
pass
112+
def calculateDuration(self):
113+
self.test_duration = time.perf_counter() - self.start_time
114+
return self.test_duration
112115

113116
def updateResult(self, result):
114117
result.startTest(self.test)
115118
self.buffered_result.updateResult(result)
116119
result.stopTest(self.test)
120+
result.core_time += self.test_duration
117121

118122
def startTest(self, test):
119123
self.start_time = time.perf_counter()
@@ -122,21 +126,21 @@ def stopTest(self, test):
122126
# TODO(sbc): figure out a way to display this duration information again when
123127
# these results get passed back to the TextTestRunner/TextTestResult.
124128
if hasattr(time, 'perf_counter'):
125-
self.buffered_result.duration = time.perf_counter() - self.start_time
129+
self.buffered_result.duration = self.test_duration
126130

127131
def addSuccess(self, test):
128132
if hasattr(time, 'perf_counter'):
129-
print(test, '... ok (%.2fs)' % (time.perf_counter() - self.start_time), file=sys.stderr)
133+
print(test, '... ok (%.2fs)' % (self.calculateDuration()), file=sys.stderr)
130134
self.buffered_result = BufferedTestSuccess(test)
131135

132136
def addExpectedFailure(self, test, err):
133137
if hasattr(time, 'perf_counter'):
134-
print(test, '... expected failure (%.2fs)' % (time.perf_counter() - self.start_time), file=sys.stderr)
138+
print(test, '... expected failure (%.2fs)' % (self.calculateDuration()), file=sys.stderr)
135139
self.buffered_result = BufferedTestExpectedFailure(test, err)
136140

137141
def addUnexpectedSuccess(self, test):
138142
if hasattr(time, 'perf_counter'):
139-
print(test, '... unexpected success (%.2fs)' % (time.perf_counter() - self.start_time), file=sys.stderr)
143+
print(test, '... unexpected success (%.2fs)' % (self.calculateDuration()), file=sys.stderr)
140144
self.buffered_result = BufferedTestUnexpectedSuccess(test)
141145

142146
def addSkip(self, test, reason):

test/runner.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import random
3030
import sys
3131
import unittest
32+
import time
3233

3334
# Setup
3435

@@ -348,13 +349,18 @@ def run_tests(options, suites):
348349
else:
349350
testRunner = unittest.TextTestRunner(verbosity=2, failfast=options.failfast)
350351

352+
total_core_time = 0
353+
run_start_time = time.perf_counter()
351354
for mod_name, suite in suites:
352355
print('Running %s: (%s tests)' % (mod_name, suite.countTestCases()))
353356
res = testRunner.run(suite)
354357
msg = ('%s: %s run, %s errors, %s failures, %s skipped' %
355358
(mod_name, res.testsRun, len(res.errors), len(res.failures), len(res.skipped)))
356359
num_failures += len(res.errors) + len(res.failures) + len(res.unexpectedSuccesses)
357360
resultMessages.append(msg)
361+
total_core_time += res.core_time
362+
total_run_time = time.perf_counter() - run_start_time
363+
print('Total core time: %.3fs. Wallclock time: %.3fs. Parallelization: %.2fx.' % (total_core_time, total_run_time, total_core_time / total_run_time))
358364

359365
if len(resultMessages) > 1:
360366
print('====================')

test/sse/test_sse4_1.cpp

Lines changed: 145 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -19,94 +19,177 @@ int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
1919
double *interesting_doubles = get_interesting_doubles();
2020
int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
2121

22-
void test_round() {
23-
Ret_M128d(__m128d, _mm_ceil_pd);
24-
Ret_M128(__m128, _mm_ceil_ps);
25-
Ret_M128d_M128d(__m128d, _mm_ceil_sd);
26-
Ret_M128_M128(__m128, _mm_ceil_ss);
27-
Ret_M128d(__m128d, _mm_floor_pd);
28-
Ret_M128(__m128, _mm_floor_ps);
29-
Ret_M128d_M128d(__m128d, _mm_floor_sd);
30-
Ret_M128_M128(__m128, _mm_floor_ss);
31-
Ret_M128d_Tint(__m128d, _mm_round_pd);
32-
Ret_M128_Tint(__m128, _mm_round_ps);
33-
Ret_M128d_M128d_Tint(__m128d, _mm_round_sd);
34-
Ret_M128_M128_Tint(__m128, _mm_round_ss);
35-
}
36-
37-
int main() {
38-
assert(numInterestingFloats % 4 == 0);
39-
assert(numInterestingInts % 4 == 0);
40-
assert(numInterestingDoubles % 4 == 0);
41-
42-
test_round();
22+
void NOINLINE test_ceil_pd() { Ret_M128d(__m128d, _mm_ceil_pd); }
23+
void NOINLINE test_ceil_ps() { Ret_M128(__m128, _mm_ceil_ps); }
24+
void NOINLINE test_ceil_sd() { Ret_M128d_M128d(__m128d, _mm_ceil_sd); }
25+
void NOINLINE test_ceil_ss() { Ret_M128_M128(__m128, _mm_ceil_ss); }
26+
void NOINLINE test_floor_pd() { Ret_M128d(__m128d, _mm_floor_pd); }
27+
void NOINLINE test_floor_ps() { Ret_M128(__m128, _mm_floor_ps); }
28+
void NOINLINE test_floor_sd() { Ret_M128d_M128d(__m128d, _mm_floor_sd); }
29+
void NOINLINE test_floor_ss() { Ret_M128_M128(__m128, _mm_floor_ss); }
30+
void NOINLINE test_round_pd() { Ret_M128d_Tint(__m128d, _mm_round_pd); }
31+
void NOINLINE test_round_ps() { Ret_M128_Tint(__m128, _mm_round_ps); }
32+
void NOINLINE test_round_sd() { Ret_M128d_M128d_Tint(__m128d, _mm_round_sd); }
33+
void NOINLINE test_round_ss() { Ret_M128_M128_Tint(__m128, _mm_round_ss); }
34+
void NOINLINE test_blend_epi16() { Ret_M128i_M128i_Tint(__m128i, _mm_blend_epi16); }
35+
void NOINLINE test_blend_pd() { Ret_M128d_M128d_Tint(__m128d, _mm_blend_pd); }
36+
void NOINLINE test_blend_ps() { Ret_M128_M128_Tint(__m128, _mm_blend_ps); }
37+
void NOINLINE test_blendv_epi8() { Ret_M128i_M128i_M128i(__m128i, _mm_blendv_epi8); }
38+
void NOINLINE test_blendv_pd() { Ret_M128d_M128d_M128d(__m128d, _mm_blendv_pd); }
39+
void NOINLINE test_blendv_ps() { Ret_M128_M128_M128(__m128, _mm_blendv_ps); }
40+
void NOINLINE test_cvtepi16_epi32() { Ret_M128i(__m128i, _mm_cvtepi16_epi32); }
41+
void NOINLINE test_cvtepi16_epi64() { Ret_M128i(__m128i, _mm_cvtepi16_epi64); }
42+
void NOINLINE test_cvtepi32_epi64() { Ret_M128i(__m128i, _mm_cvtepi32_epi64); }
43+
void NOINLINE test_cvtepi8_epi16() { Ret_M128i(__m128i, _mm_cvtepi8_epi16); }
44+
void NOINLINE test_cvtepi8_epi32() { Ret_M128i(__m128i, _mm_cvtepi8_epi32); }
45+
void NOINLINE test_cvtepi8_epi64() { Ret_M128i(__m128i, _mm_cvtepi8_epi64); }
46+
void NOINLINE test_cvtepu16_epi32() { Ret_M128i(__m128i, _mm_cvtepu16_epi32); }
47+
void NOINLINE test_cvtepu16_epi64() { Ret_M128i(__m128i, _mm_cvtepu16_epi64); }
48+
void NOINLINE test_cvtepu32_epi64() { Ret_M128i(__m128i, _mm_cvtepu32_epi64); }
49+
void NOINLINE test_cvtepu8_epi16() { Ret_M128i(__m128i, _mm_cvtepu8_epi16); }
50+
void NOINLINE test_cvtepu8_epi32() { Ret_M128i(__m128i, _mm_cvtepu8_epi32); }
51+
void NOINLINE test_cvtepu8_epi64() { Ret_M128i(__m128i, _mm_cvtepu8_epi64); }
52+
void NOINLINE test_extract_epi32() { Ret_M128i_Tint(int, _mm_extract_epi32); }
53+
void NOINLINE test_extract_epi64() { Ret_M128i_Tint(int64_t, _mm_extract_epi64); }
54+
void NOINLINE test_extract_epi8() { Ret_M128i_Tint(int, _mm_extract_epi8); }
55+
void NOINLINE test_extract_ps() { Ret_M128_Tint(float, _mm_extract_ps); }
56+
void NOINLINE test_insert_epi32() { Ret_M128i_int_Tint(__m128i, _mm_insert_epi32); }
57+
void NOINLINE test_insert_epi64() { Ret_M128i_int_Tint(__m128i, _mm_insert_epi64); }
58+
void NOINLINE test_insert_ps() { Ret_M128_M128_Tint(__m128, _mm_insert_ps); }
59+
void NOINLINE test_max_epi32() { Ret_M128i_M128i(__m128i, _mm_max_epi32); }
60+
void NOINLINE test_max_epi8() { Ret_M128i_M128i(__m128i, _mm_max_epi8); }
61+
void NOINLINE test_max_epu16() { Ret_M128i_M128i(__m128i, _mm_max_epu16); }
62+
void NOINLINE test_max_epu32() { Ret_M128i_M128i(__m128i, _mm_max_epu32); }
63+
void NOINLINE test_min_epi32() { Ret_M128i_M128i(__m128i, _mm_min_epi32); }
64+
void NOINLINE test_min_epi8() { Ret_M128i_M128i(__m128i, _mm_min_epi8); }
65+
void NOINLINE test_min_epu16() { Ret_M128i_M128i(__m128i, _mm_min_epu16); }
66+
void NOINLINE test_min_epu32() { Ret_M128i_M128i(__m128i, _mm_min_epu32); }
67+
void NOINLINE test_test_cmpeq_epi64() { Ret_M128i_M128i(__m128i, _mm_cmpeq_epi64); }
68+
void NOINLINE test_test_minpos_epu16() { Ret_M128i(__m128i, _mm_minpos_epu16); }
69+
void NOINLINE test_test_mpsadbw_epu8() { Ret_M128i_M128i_Tint(__m128i, _mm_mpsadbw_epu8); }
70+
void NOINLINE test_testmul_epi32() { Ret_M128i_M128i(__m128i, _mm_mul_epi32); }
71+
void NOINLINE test_test_mullo_epi32() { Ret_M128i_M128i(__m128i, _mm_mullo_epi32); }
72+
void NOINLINE test_test_packus_epi32() { Ret_M128i_M128i(__m128i, _mm_packus_epi32); }
73+
void NOINLINE test_test_stream_load_si128() { Ret_IntPtr(__m128i, _mm_stream_load_si128, __m128i*, 4, 4); }
4374

44-
Ret_M128i_M128i_Tint(__m128i, _mm_blend_epi16);
45-
Ret_M128d_M128d_Tint(__m128d, _mm_blend_pd);
46-
Ret_M128_M128_Tint(__m128, _mm_blend_ps);
47-
Ret_M128i_M128i_M128i(__m128i, _mm_blendv_epi8);
48-
Ret_M128d_M128d_M128d(__m128d, _mm_blendv_pd);
49-
Ret_M128_M128_M128(__m128, _mm_blendv_ps);
50-
Ret_M128i_M128i(__m128i, _mm_cmpeq_epi64);
51-
Ret_M128i(__m128i, _mm_cvtepi16_epi32);
52-
Ret_M128i(__m128i, _mm_cvtepi16_epi64);
53-
Ret_M128i(__m128i, _mm_cvtepi32_epi64);
54-
Ret_M128i(__m128i, _mm_cvtepi8_epi16);
55-
Ret_M128i(__m128i, _mm_cvtepi8_epi32);
56-
Ret_M128i(__m128i, _mm_cvtepi8_epi64);
57-
Ret_M128i(__m128i, _mm_cvtepu16_epi32);
58-
Ret_M128i(__m128i, _mm_cvtepu16_epi64);
59-
Ret_M128i(__m128i, _mm_cvtepu32_epi64);
60-
Ret_M128i(__m128i, _mm_cvtepu8_epi16);
61-
Ret_M128i(__m128i, _mm_cvtepu8_epi32);
62-
Ret_M128i(__m128i, _mm_cvtepu8_epi64);
75+
void NOINLINE test_dp_pd() {
76+
bool oldTestNaNBits = testNaNBits;
6377
testNaNBits = false;
6478
Ret_M128d_M128d_Tint(__m128d, _mm_dp_pd);
79+
testNaNBits = oldTestNaNBits;
80+
}
81+
void NOINLINE test_dp_ps() {
82+
bool oldTestNaNBits = testNaNBits;
83+
testNaNBits = false;
6584
Ret_M128_M128_Tint(__m128, _mm_dp_ps); // _mm_dp_ps emulation does not match NaN bit selection rules (seems to be unspecified)
66-
testNaNBits = true;
67-
Ret_M128i_Tint(int, _mm_extract_epi32);
68-
Ret_M128i_Tint(int64_t, _mm_extract_epi64);
69-
Ret_M128i_Tint(int, _mm_extract_epi8);
70-
Ret_M128_Tint(float, _mm_extract_ps);
71-
Ret_M128i_int_Tint(__m128i, _mm_insert_epi32);
72-
Ret_M128i_int_Tint(__m128i, _mm_insert_epi64);
73-
Ret_M128_M128_Tint(__m128, _mm_insert_ps);
74-
Ret_M128i_M128i(__m128i, _mm_max_epi32);
75-
Ret_M128i_M128i(__m128i, _mm_max_epi8);
76-
Ret_M128i_M128i(__m128i, _mm_max_epu16);
77-
Ret_M128i_M128i(__m128i, _mm_max_epu32);
78-
Ret_M128i_M128i(__m128i, _mm_min_epi32);
79-
Ret_M128i_M128i(__m128i, _mm_min_epi8);
80-
Ret_M128i_M128i(__m128i, _mm_min_epu16);
81-
Ret_M128i_M128i(__m128i, _mm_min_epu32);
82-
Ret_M128i(__m128i, _mm_minpos_epu16);
83-
Ret_M128i_M128i_Tint(__m128i, _mm_mpsadbw_epu8);
84-
Ret_M128i_M128i(__m128i, _mm_mul_epi32);
85-
Ret_M128i_M128i(__m128i, _mm_mullo_epi32);
86-
Ret_M128i_M128i(__m128i, _mm_packus_epi32);
87-
Ret_IntPtr(__m128i, _mm_stream_load_si128, __m128i*, 4, 4);
85+
testNaNBits = oldTestNaNBits;
86+
}
87+
88+
void NOINLINE test_test_all_ones() {
8889
Ret_M128i(int, _mm_test_all_ones);
8990
printf("_mm_test_all_ones(0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
9091
printf("_mm_test_all_ones(0xFFFFFFFFFFFFFFFEull): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull)));
9192
printf("_mm_test_all_ones(0): %d\n", _mm_test_all_ones(_mm_set1_epi64x(0)));
93+
}
94+
95+
void NOINLINE test_test_all_zeros() {
9296
Ret_M128i_M128i(int, _mm_test_all_zeros);
9397
printf("_mm_test_all_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
9498
printf("_mm_test_all_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
9599
printf("_mm_test_all_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_all_zeros(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
100+
}
101+
102+
void NOINLINE test_test_mix_ones_zeros() {
96103
Ret_M128i_M128i(int, _mm_test_mix_ones_zeros);
97104
printf("_mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
98105
printf("_mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
99106
printf("_mm_test_mix_ones_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_test_mix_ones_zeros(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
107+
}
108+
109+
void NOINLINE test_testc() {
100110
Ret_M128i_M128i(int, _mm_testc_si128);
101111
printf("_mm_testc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
102112
printf("_mm_testc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
103113
printf("_mm_testc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testc_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
114+
}
115+
116+
void NOINLINE test_testnzc() {
104117
Ret_M128i_M128i(int, _mm_testnzc_si128);
105118
printf("_mm_testnzc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
106119
printf("_mm_testnzc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
107120
printf("_mm_testnzc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testnzc_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
121+
}
122+
123+
void NOINLINE test_testz() {
108124
Ret_M128i_M128i(int, _mm_testz_si128);
109125
printf("_mm_testz_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
110126
printf("_mm_testz_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0xFFFFFFFFFFFFFFFEull), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
111127
printf("_mm_testz_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n", _mm_testz_si128(_mm_set1_epi64x(0), _mm_set1_epi64x(0xFFFFFFFFFFFFFFFFull)));
112128
}
129+
130+
int main() {
131+
assert(numInterestingFloats % 4 == 0);
132+
assert(numInterestingInts % 4 == 0);
133+
assert(numInterestingDoubles % 4 == 0);
134+
135+
test_ceil_pd();
136+
test_ceil_ps();
137+
test_ceil_sd();
138+
test_ceil_ss();
139+
test_floor_pd();
140+
test_floor_ps();
141+
test_floor_sd();
142+
test_floor_ss();
143+
test_round_pd();
144+
test_round_ps();
145+
test_round_sd();
146+
test_round_ss();
147+
test_blend_epi16();
148+
test_blend_pd();
149+
test_blend_ps();
150+
test_blendv_epi8();
151+
test_blendv_pd();
152+
test_blendv_ps();
153+
test_cvtepi16_epi32();
154+
test_cvtepi16_epi64();
155+
test_cvtepi32_epi64();
156+
test_cvtepi8_epi16();
157+
test_cvtepi8_epi32();
158+
test_cvtepi8_epi64();
159+
test_cvtepu16_epi32();
160+
test_cvtepu16_epi64();
161+
test_cvtepu32_epi64();
162+
test_cvtepu8_epi16();
163+
test_cvtepu8_epi32();
164+
test_cvtepu8_epi64();
165+
test_extract_epi32();
166+
test_extract_epi64();
167+
test_extract_epi8();
168+
test_extract_ps();
169+
test_insert_epi32();
170+
test_insert_epi64();
171+
test_insert_ps();
172+
test_max_epi32();
173+
test_max_epi8();
174+
test_max_epu16();
175+
test_max_epu32();
176+
test_min_epi32();
177+
test_min_epi8();
178+
test_min_epu16();
179+
test_min_epu32();
180+
test_test_cmpeq_epi64();
181+
test_test_minpos_epu16();
182+
test_test_mpsadbw_epu8();
183+
test_testmul_epi32();
184+
test_test_mullo_epi32();
185+
test_test_packus_epi32();
186+
test_test_stream_load_si128();
187+
test_dp_pd();
188+
test_dp_ps();
189+
test_test_all_ones();
190+
test_test_all_zeros();
191+
test_test_mix_ones_zeros();
192+
test_testc();
193+
test_testnzc();
194+
test_testz();
195+
}

0 commit comments

Comments
 (0)