@@ -256,46 +256,25 @@ namespace GSScanlineConstantData
256
256
// Constant shared by all threads (to reduce cache miss)
257
257
struct alignas (64 ) GSScanlineConstantData256B
258
258
{
259
- alignas (32 ) u8 m_test[16 ][8 ] = {
260
- {0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 },
261
- {0xff , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 },
262
- {0xff , 0xff , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 },
263
- {0xff , 0xff , 0xff , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 },
264
- {0xff , 0xff , 0xff , 0xff , 0x00 , 0x00 , 0x00 , 0x00 },
265
- {0xff , 0xff , 0xff , 0xff , 0xff , 0x00 , 0x00 , 0x00 },
266
- {0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0x00 , 0x00 },
267
- {0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0x00 },
268
- {0x00 , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff },
269
- {0x00 , 0x00 , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff },
270
- {0x00 , 0x00 , 0x00 , 0xff , 0xff , 0xff , 0xff , 0xff },
271
- {0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0xff , 0xff },
272
- {0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0xff },
273
- {0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff },
274
- {0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff },
275
- {0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 },
259
+ // All AVX processors support unaligned access with little to no penalty as long as you don't cross a cache line.
260
+ // Take advantage of that to store single vectors that we index with single-element alignment
261
+ alignas (32 ) u8 m_test[24 ] = {
262
+ 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
263
+ 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
264
+ 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
276
265
};
277
- alignas (32 ) float m_shift[9 ][8 ] = {
278
- { 8 .0f , 8 .0f , 8 .0f , 8 .0f , 8 .0f , 8 .0f , 8 .0f , 8 .0f },
279
- { 0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f , 6 .0f , 7 .0f },
280
- { -1 .0f , 0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f , 6 .0f },
281
- { -2 .0f , -1 .0f , 0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f },
282
- { -3 .0f , -2 .0f , -1 .0f , 0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f },
283
- { -4 .0f , -3 .0f , -2 .0f , -1 .0f , 0 .0f , 1 .0f , 2 .0f , 3 .0f },
284
- { -5 .0f , -4 .0f , -3 .0f , -2 .0f , -1 .0f , 0 .0f , 1 .0f , 2 .0f },
285
- { -6 .0f , -5 .0f , -4 .0f , -3 .0f , -2 .0f , -1 .0f , 0 .0f , 1 .0f },
286
- { -7 .0f , -6 .0f , -5 .0f , -4 .0f , -3 .0f , -2 .0f , -1 .0f , 0 .0f },
266
+ float m_log2_coef[4 ] = {};
267
+ alignas (64 ) float m_shift[16 ] = {
268
+ 8 .0f , -7 .0f , -6 .0f , -5 .0f , -4 .0f , -3 .0f , -2 .0f , -1 .0f ,
269
+ 0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f , 6 .0f , 7 .0f ,
287
270
};
288
- alignas (32 ) float m_log2_coef[4 ][8 ] = {};
289
271
290
272
constexpr GSScanlineConstantData256B ()
291
273
{
292
274
using namespace GSScanlineConstantData ;
293
275
for (size_t n = 0 ; n < std::size (log2_coef); ++n)
294
276
{
295
- for (size_t i = 0 ; i < 8 ; ++i)
296
- {
297
- m_log2_coef[n][i] = log2_coef[n];
298
- }
277
+ m_log2_coef[n] = log2_coef[n];
299
278
}
300
279
}
301
280
};
0 commit comments