Skip to content

Commit e00900b

Browse files
authored
Merge pull request #15 from jw3126/functional
Add functional variants.
2 parents 73adf98 + f49cdfc commit e00900b

File tree

6 files changed

+233
-47
lines changed

6 files changed

+233
-47
lines changed

src/Random123.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ using RandomNumbers
2121
export set_counter!
2222
include("common.jl")
2323

24-
export Threefry2x, Threefry4x
24+
export Threefry2x, Threefry4x, threefry
2525
include("threefry.jl")
2626

27-
export Philox2x, Philox4x
27+
export Philox2x, Philox4x, philox
2828
include("philox.jl")
2929

3030
export R123_USE_AESNI
@@ -46,8 +46,8 @@ catch e
4646
end
4747

4848
@static if R123_USE_AESNI
49-
export AESNI1x, AESNI4x
50-
export ARS1x, ARS4x
49+
export AESNI1x, AESNI4x, aesni
50+
export ARS1x, ARS4x, ars
5151
include("./aesni_common.jl")
5252
include("./aesni.jl")
5353
include("./ars.jl")

src/aesni.jl

+39-14
Original file line numberDiff line numberDiff line change
@@ -201,26 +201,51 @@ copy(src::AESNI4x) = copyto!(AESNI4x(), src)
201201
==(r1::AESNI4x, r2::AESNI4x) = unsafe_compare(r1, r2, UInt128, 2) &&
202202
r1.key == r2.key && r1.p == r2.p
203203

204-
function aesni1xm128i(input::__m128i, key::AESNIKey)
205-
x = key.key1 input
206-
x = _aes_enc(x, key.key2)
207-
x = _aes_enc(x, key.key3)
208-
x = _aes_enc(x, key.key4)
209-
x = _aes_enc(x, key.key5)
210-
x = _aes_enc(x, key.key6)
211-
x = _aes_enc(x, key.key7)
212-
x = _aes_enc(x, key.key8)
213-
x = _aes_enc(x, key.key9)
214-
x = _aes_enc(x, key.key10)
215-
x = _aes_enc_last(x, key.key11)
204+
function get_key__m128i(o::Union{AESNI1x, AESNI4x})::NTuple{11, __m128i}
205+
k = o.key
206+
(k.key1,k.key2,k.key3,k.key4,k.key5,k.key6,k.key7,k.key8,k.key9,k.key10,k.key11)
216207
end
208+
get_ctr__m128i(o::AESNI4x)::Tuple{__m128i} = (o.ctr1,)
209+
get_ctr__m128i(o::AESNI1x)::Tuple{__m128i} = (o.ctr,)
210+
get_key(o::Union{AESNI1x, AESNI4x})::NTuple{11,UInt128} = map(UInt128, get_key__m128i(o))
211+
get_ctr(o::Union{AESNI1x, AESNI4x})::Tuple{UInt128} = map(UInt128, get_ctr__m128i(o))
212+
213+
@inline function aesni(key::NTuple{11,__m128i}, ctr::Tuple{__m128i})::Tuple{__m128i}
214+
key1, key2, key3, key4, key5, key6, key7, key8, key9, key10, key11 = key
215+
ctr1 = only(ctr)
216+
x = key1 ctr1
217+
x = _aes_enc(x, key2)
218+
x = _aes_enc(x, key3)
219+
x = _aes_enc(x, key4)
220+
x = _aes_enc(x, key5)
221+
x = _aes_enc(x, key6)
222+
x = _aes_enc(x, key7)
223+
x = _aes_enc(x, key8)
224+
x = _aes_enc(x, key9)
225+
x = _aes_enc(x, key10)
226+
x = _aes_enc_last(x, key11)
227+
(x,)
228+
end
229+
230+
"""
231+
aesni(key::NTuple{11,UInt128}, ctr::Tuple{UInt128})::Tuple{UInt128}
232+
233+
Functional variant of [`AESNI1x`](@ref) and [`AESNI4x`](@ref).
234+
This function if free of mutability and side effects.
235+
"""
236+
@inline function aesni(key::NTuple{11,UInt128}, ctr::Tuple{UInt128})::Tuple{UInt128}
237+
k = map(__m128i, key)
238+
c = map(__m128i, ctr)
239+
map(UInt128,aesni(k,c))
240+
end
241+
217242

218243
@inline function random123_r(r::AESNI1x)
219-
r.x = aesni1xm128i(r.ctr, r.key)
244+
r.x = only(aesni(get_key__m128i(r), get_ctr__m128i(r)))
220245
(UInt128(r.x),)
221246
end
222247

223248
@inline function random123_r(r::AESNI4x)
224-
r.x = aesni1xm128i(r.ctr1, r.key)
249+
r.x = only(aesni(get_key__m128i(r), get_ctr__m128i(r)))
225250
split_uint(UInt128(r.x), UInt32)
226251
end

src/ars.jl

+35-8
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,15 @@ copy(src::ARS4x{R}) where R = ARS4x{R}(src.x, src.ctr1, src.key, src.p)
9898

9999
==(r1::ARS4x{R}, r2::ARS4x{R}) where R = unsafe_compare(r1, r2, UInt128, 3) && r1.p r2.p
100100

101-
@generated function ars1xm128i(r::Union{ARS1x{R}, ARS4x{R}}) where R
101+
function expr_ars1xm128i(expr_key, expr_ctr, R)
102102
@assert R isa Int && 1 R 10
103103
rounds = [quote
104104
kk += kweyl
105105
v = _aes_enc(v, kk)
106106
end for _ in 2:R]
107-
ctr = :(r.ctr)
108-
if r <: ARS4x
109-
ctr.args[2] = :(:ctr1)
110-
end
111107
quote
112-
ctr = $ctr
113-
key = r.key
108+
ctr = $(expr_ctr)
109+
key = $(expr_key)
114110
kweyl = __m128i(0xbb67ae8584caa73b, 0x9e3779b97f4a7c15)
115111
kk = key
116112
v = ctr kk
@@ -122,12 +118,43 @@ copy(src::ARS4x{R}) where R = ARS4x{R}(src.x, src.ctr1, src.key, src.p)
122118
end
123119
end
124120

121+
@generated function ars1xm128i(r::Union{ARS1x{R}, ARS4x{R}}) where R
122+
expr_ctr = if r <: ARS1x
123+
:(r.ctr)
124+
elseif r <: ARS4x
125+
:(r.ctr1)
126+
else
127+
:(error("Unreachable"))
128+
end
129+
expr_key = :(r.key)
130+
expr_ars1xm128i(expr_key, expr_ctr, R)
131+
end
132+
133+
@generated function ars(key::Tuple{__m128i}, ctr::Tuple{__m128i}, ::Val{R})::Tuple{__m128i} where {R}
134+
:(($(expr_ars1xm128i(:(only(key)), :(only(ctr)), R)),))
135+
end
136+
137+
"""
138+
ars(key::Tuple{UInt128}, ctr::Tuple{UInt128}, rounds::Val{R})::Tuple{UInt128} where {R}
139+
140+
Functional variant of [`ARS1x`](@ref) and [`ARS4x`](@ref).
141+
This function if free of mutability and side effects.
142+
"""
143+
function ars(key::Tuple{UInt128}, ctr::Tuple{UInt128}, rounds::Val{R})::Tuple{UInt128} where {R}
144+
k = map(__m128i, key)
145+
c = map(__m128i, ctr)
146+
map(UInt128,ars(k,c,rounds))
147+
end
148+
149+
get_key(r::Union{ARS1x, ARS4x}) = (UInt128(r.key),)
150+
get_ctr(r::ARS1x) = (UInt128(r.ctr),)
151+
get_ctr(r::ARS4x) = (UInt128(r.ctr1),)
152+
125153
@inline function random123_r(r::ARS1x{R}) where R
126154
r.x = ars1xm128i(r)
127155
(UInt128(r.x),)
128156
end
129157

130-
131158
@inline function random123_r(r::ARS4x{R}) where R
132159
r.x = ars1xm128i(r)
133160
split_uint(UInt128(r.x), UInt32)

src/philox.jl

+27-5
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,24 @@ end
8989
key + PHILOX_W_0(T)
9090
end
9191

92+
@inline get_key(r::Philox2x) = (r.key,)
93+
@inline get_ctr(r::Philox2x) = (r.ctr1, r.ctr2)
94+
9295
@inline function random123_r(r::Philox2x{T, R}) where {T <: Union{UInt32, UInt64}, R}
93-
ctr1, ctr2, key = r.ctr1, r.ctr2, r.key
96+
r.x1, r.x2 = philox(get_key(r), get_ctr(r), Val(R))
97+
end
98+
99+
"""
100+
philox(key::NTuple{1,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T}
101+
philox(key::NTuple{2,T}, ctr::NTuple{4,T}, ::Val{R})::NTuple{4,T}
102+
103+
Functional variant of [`Philox2x`](@ref) and [`Philox4x`](@ref).
104+
Produces a pseudorandom output of type `T = UInt64` or `T = UInt32` from the inputs.
105+
This function if free of mutability and side effects.
106+
"""
107+
@inline function philox(key_::Tuple{T}, ctr::NTuple{2,T}, ::Val{R}) where {T,R}
108+
key = first(key_)
109+
ctr1, ctr2 = ctr
94110
if R > 0 ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
95111
if R > 1 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
96112
if R > 2 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
@@ -107,7 +123,7 @@ end
107123
if R > 13 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
108124
if R > 14 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
109125
if R > 15 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
110-
r.x1, r.x2 = ctr1, ctr2
126+
ctr1, ctr2
111127
end
112128

113129
"""
@@ -183,9 +199,15 @@ end
183199
key1 + PHILOX_W_0(T), key2 + PHILOX_W_1(T)
184200
end
185201

202+
@inline get_ctr(r::Philox4x) = (r.ctr1, r.ctr2, r.ctr3, r.ctr4)
203+
@inline get_key(r::Philox4x) = (r.key1, r.key2)
186204
@inline function random123_r(r::Philox4x{T, R}) where {T <: Union{UInt32, UInt64}, R}
187-
ctr1, ctr2, ctr3, ctr4 = r.ctr1, r.ctr2, r.ctr3, r.ctr4
188-
key1, key2 = r.key1, r.key2
205+
r.x1, r.x2, r.x3, r.x4 = philox(get_key(r), get_ctr(r), Val(R))
206+
end
207+
208+
@inline function philox(key::NTuple{2,T}, ctr::NTuple{4,T}, ::Val{R}) where {T <:Union{UInt32, UInt64}, R}
209+
ctr1, ctr2, ctr3, ctr4 = ctr
210+
key1, key2 = key
189211
if R > 0
190212
ctr1, ctr2, ctr3, ctr4 = philox4x_round(ctr1, ctr2, ctr3, ctr4, key1, key2);
191213
end
@@ -249,5 +271,5 @@ end
249271
key1, key2 = philox4x_bumpkey(key1, key2);
250272
ctr1, ctr2, ctr3, ctr4 = philox4x_round(ctr1, ctr2, ctr3, ctr4, key1, key2);
251273
end
252-
r.x1, r.x2, r.x3, r.x4 = ctr1, ctr2, ctr3, ctr4
274+
ctr1, ctr2, ctr3, ctr4
253275
end

src/threefry.jl

+27-14
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,25 @@ copy(src::Threefry2x{T, R}) where {T, R} = Threefry2x{T, R}(src.x1, src.x2, src.
116116

117117
==(r1::Threefry2x{T, R}, r2::Threefry2x{T, R}) where {T, R} = unsafe_compare(r1, r2, T, 6) && r1.p == r2.p
118118

119+
@inline get_key(r::Threefry2x) = (r.key1, r.key2)
120+
@inline get_ctr(r::Threefry2x) = (r.ctr1, r.ctr2)
119121
@inline function random123_r(r::Threefry2x{T, R}) where {T <: Union{UInt32, UInt64}, R}
122+
r.x1, r.x2 = threefry(get_key(r), get_ctr(r), Val(R))
123+
end
124+
125+
"""
126+
threefry(key::NTuple{2,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T}
127+
threefry(key::NTuple{4,T}, ctr::NTuple{4,T}, ::Val{R})::NTuple{4,T}
128+
129+
Functional variant of [`Threefry2x`](@ref) and [`Threefry4x`](@ref).
130+
Produces a pseudorandom output of type `T = UInt64` or `T = UInt32` from the inputs.
131+
This function if free of mutability and side effects.
132+
"""
133+
@inline function threefry(key::NTuple{2,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T} where {T <: Union{UInt32, UInt64}, R}
120134
ks2 = SKEIN_KS_PARITY(T)
121-
ks0 = r.key1
122-
x0 = r.ctr1
135+
x0,x1 = ctr
136+
ks0,ks1 = key
123137
ks2 ⊻= ks0
124-
ks1 = r.key2
125-
x1 = r.ctr2
126138
ks2 ⊻= ks1
127139
x0 += ks0
128140
x1 += ks1
@@ -191,7 +203,7 @@ copy(src::Threefry2x{T, R}) where {T, R} = Threefry2x{T, R}(src.x1, src.x2, src.
191203
x0 += ks2; x1 += ks0;
192204
x1 += 8 % T;
193205
end
194-
r.x1, r.x2 = x0, x1
206+
x0, x1
195207
end
196208

197209
"""
@@ -257,19 +269,20 @@ copy(src::Threefry4x{T, R}) where {T, R} = Threefry4x{T, R}(src.x1, src.x2, src.
257269

258270
==(r1::Threefry4x{T, R}, r2::Threefry4x{T, R}) where {T, R} = unsafe_compare(r1, r2, T, 12) && r1.p == r2.p
259271

272+
@inline get_key(r::Threefry4x) = (r.key1, r.key2, r.key3, r.key4)
273+
@inline get_ctr(r::Threefry4x) = (r.ctr1, r.ctr2, r.ctr3, r.ctr4)
274+
260275
@inline function random123_r(r::Threefry4x{T, R}) where {T <: Union{UInt32, UInt64}, R}
276+
r.x1, r.x2, r.x3, r.x4 = threefry(get_key(r), get_ctr(r), Val(R))
277+
end
278+
279+
@inline function threefry(key::NTuple{4,T},ctr::NTuple{4,T}, rounds::Val{R})::NTuple{4,T} where {T <: Union{UInt32, UInt64}, R}
261280
ks4 = SKEIN_KS_PARITY(T)
262-
ks0 = r.key1
263-
x0 = r.ctr1
281+
ks0,ks1,ks2,ks3 = key
282+
x0,x1,x2,x3 = ctr
264283
ks4 ⊻= ks0
265-
ks1 = r.key2
266-
x1 = r.ctr2
267284
ks4 ⊻= ks1
268-
ks2 = r.key3
269-
x2 = r.ctr3
270285
ks4 ⊻= ks2
271-
ks3 = r.key4
272-
x3 = r.ctr4
273286
ks4 ⊻= ks3
274287
x0 += ks0; x1 += ks1; x2 += ks2; x3 += ks3;
275288

@@ -633,5 +646,5 @@ copy(src::Threefry4x{T, R}) where {T, R} = Threefry4x{T, R}(src.x1, src.x2, src.
633646
x0 += ks3; x1 += ks4; x2 += ks0; x3 += ks1;
634647
x3 += 18 % T;
635648
end
636-
r.x1, r.x2, r.x3, r.x4 = x0, x1, x2, x3
649+
x0, x1, x2, x3
637650
end

0 commit comments

Comments
 (0)