Skip to content

Commit c45d38b

Browse files
committedDec 8, 2021
Smaller shuffle mask
1 parent fe35271 commit c45d38b

File tree

1 file changed

+8
-37
lines changed

1 file changed

+8
-37
lines changed
 

‎keyset/keyset_arm64.s

+8-37
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ safeload:
9090
// key and then shuffle the key forward in the register. We can shuffle and
9191
// pad with zeroes at the same time to avoid having to also blend (as load
9292
// does).
93-
ADD R3, R2, R2
94-
SUB $16, R2, R2
93+
MOVD $16, R12
94+
SUB R3, R12, R12
95+
SUB R12, R2, R2
9596
VLD1 (R2), [V0.B16]
9697
MOVD $shuffle_masks<>(SB), R10
97-
ADD R3<<4, R10, R10
98+
ADD R12, R10, R10
9899
VLD1 (R10), [V2.B16]
99100
VTBL V2.B16, [V0.B16, V1.B16], V3.B16
100101
JMP loop
@@ -135,38 +136,8 @@ DATA blend_masks<>+256(SB)/8, $0x0706050403020100
135136
DATA blend_masks<>+264(SB)/8, $0x0F0E0D0C0B0A0908
136137
GLOBL blend_masks<>(SB), RODATA|NOPTR, $272
137138

138-
DATA shuffle_masks<>+0(SB)/8, $0x1010101010101010
139-
DATA shuffle_masks<>+8(SB)/8, $0x1010101010101010
140-
DATA shuffle_masks<>+16(SB)/8, $0x101010101010100F
139+
DATA shuffle_masks<>+0(SB)/8, $0x0706050403020100
140+
DATA shuffle_masks<>+8(SB)/8, $0x0F0E0D0C0B0A0908
141+
DATA shuffle_masks<>+16(SB)/8, $0x1010101010101010
141142
DATA shuffle_masks<>+24(SB)/8, $0x1010101010101010
142-
DATA shuffle_masks<>+32(SB)/8, $0x1010101010100F0E
143-
DATA shuffle_masks<>+40(SB)/8, $0x1010101010101010
144-
DATA shuffle_masks<>+48(SB)/8, $0x10101010100F0E0D
145-
DATA shuffle_masks<>+56(SB)/8, $0x1010101010101010
146-
DATA shuffle_masks<>+64(SB)/8, $0x101010100F0E0D0C
147-
DATA shuffle_masks<>+72(SB)/8, $0x1010101010101010
148-
DATA shuffle_masks<>+80(SB)/8, $0x1010100F0E0D0C0B
149-
DATA shuffle_masks<>+88(SB)/8, $0x1010101010101010
150-
DATA shuffle_masks<>+96(SB)/8, $0x10100F0E0D0C0B0A
151-
DATA shuffle_masks<>+104(SB)/8, $0x1010101010101010
152-
DATA shuffle_masks<>+112(SB)/8, $0x100F0E0D0C0B0A09
153-
DATA shuffle_masks<>+120(SB)/8, $0x1010101010101010
154-
DATA shuffle_masks<>+128(SB)/8, $0x0F0E0D0C0B0A0908
155-
DATA shuffle_masks<>+136(SB)/8, $0x1010101010101010
156-
DATA shuffle_masks<>+144(SB)/8, $0x0E0D0C0B0A090807
157-
DATA shuffle_masks<>+152(SB)/8, $0x101010101010100F
158-
DATA shuffle_masks<>+160(SB)/8, $0x0D0C0B0A09080706
159-
DATA shuffle_masks<>+168(SB)/8, $0x1010101010100F0E
160-
DATA shuffle_masks<>+176(SB)/8, $0x0C0B0A0908070605
161-
DATA shuffle_masks<>+184(SB)/8, $0x10101010100F0E0D
162-
DATA shuffle_masks<>+192(SB)/8, $0x0B0A090807060504
163-
DATA shuffle_masks<>+200(SB)/8, $0x101010100F0E0D0C
164-
DATA shuffle_masks<>+208(SB)/8, $0x0A09080706050403
165-
DATA shuffle_masks<>+216(SB)/8, $0x1010100F0E0D0C0B
166-
DATA shuffle_masks<>+224(SB)/8, $0x0908070605040302
167-
DATA shuffle_masks<>+232(SB)/8, $0x10100F0E0D0C0B0A
168-
DATA shuffle_masks<>+240(SB)/8, $0x0807060504030201
169-
DATA shuffle_masks<>+248(SB)/8, $0x100F0E0D0C0B0A09
170-
DATA shuffle_masks<>+256(SB)/8, $0x0706050403020100
171-
DATA shuffle_masks<>+264(SB)/8, $0x0F0E0D0C0B0A0908
172-
GLOBL shuffle_masks<>(SB), RODATA|NOPTR, $272
143+
GLOBL shuffle_masks<>(SB), RODATA|NOPTR, $32

0 commit comments

Comments
 (0)