@@ -39,35 +39,31 @@ struct AlignedColMajor{T} <: LayoutBase{T} end
3939
4040# TODO : cleanup vectorisation
4141@inline function load (:: Type{AlignedColMajor{T}} , workspace, tile:: Tile{size} ) where {T, size}
42- vec_len = 16 ÷ sizeof (T)
43- N = (sizeof (T) * vec_len) ÷ sizeof (Float32)
44- res = MArray {Tuple{size[1] ÷ vec_len, size[2]}, NTuple{N, VecElement{Float32}}} (undef)
42+ res = MArray {Tuple{size[1], size[2]}, T} (undef)
4543
4644 @unroll for j = 1 : size[2 ]
47- @unroll for i = 1 : vec_len : size[1 ]
45+ @unroll for i = 1 : size[1 ]
4846 t = translate (tile, (i - 1 , j - 1 ))
4947
5048 linear_base = linearise (t. base, Base. size (workspace))
5149 linear_offset = linearise (t. offset, Base. size (workspace))
5250
53- @inbounds res[i, j] = vloada (Vec{vec_len, T}, pointer ( workspace, linear_base), linear_offset)
51+ @inbounds res[i, j] = workspace[ linear_base + linear_offset - 1 ]
5452 end
5553 end
5654
5755 return res
5856end
5957
6058@inline function store! (:: Type{AlignedColMajor{T}} , workspace, value, tile:: Tile{size} ) where {T, size}
61- vec_len = 16 ÷ sizeof (T)
62-
6359 @unroll for j = 1 : size[2 ]
64- @unroll for i = 1 : vec_len : size[1 ]
60+ @unroll for i = 1 : size[1 ]
6561 t = translate (tile, (i - 1 , j - 1 ))
6662
6763 linear_base = linearise (t. base, Base. size (workspace))
6864 linear_offset = linearise (t. offset, Base. size (workspace))
6965
70- vstorea! (Vec{vec_len, T}, pointer ( workspace, linear_base), value[i, j], linear_offset)
66+ @inbounds workspace[ linear_base + linear_offset - 1 ] = value[i,j]
7167 end
7268 end
7369end
0 commit comments