Skip to content

Commit aba9094

Browse files
committed
Implement a bias free rand:shuffle algorithm
1 parent 8fcfb4e commit aba9094

File tree

1 file changed

+28
-192
lines changed

1 file changed

+28
-192
lines changed

lib/stdlib/src/rand.erl

Lines changed: 28 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,7 @@ the generator's range:
405405
bytes/1, bytes_s/2,
406406
jump/0, jump/1,
407407
normal/0, normal/2, normal_s/1, normal_s/3,
408-
shuffle1/1, shuffle1_s/2, shuffle2/1, shuffle2_s/2,
409-
shuffle3/1, shuffle3_s/2, shuffle4/1, shuffle4_s/2
408+
shuffle/1, shuffle_s/2
410409
]).
411410

412411
%% Utilities
@@ -1317,16 +1316,15 @@ normal_s(Mean, Variance, State0) when 0 =< Variance ->
13171316
{X, State} = normal_s(State0),
13181317
{Mean + (math:sqrt(Variance) * X), State}.
13191318

1320-
%% -------
13211319

1322-
-spec shuffle1(list()) -> list().
1323-
shuffle1(List) ->
1324-
{ShuffledList, State} = shuffle1_s(List, seed_get()),
1320+
-spec shuffle(list()) -> list().
1321+
shuffle(List) ->
1322+
{ShuffledList, State} = shuffle_s(List, seed_get()),
13251323
_ = seed_put(State),
13261324
ShuffledList.
13271325

1328-
-spec shuffle1_s(list(), state()) -> {list(), state()}.
1329-
shuffle1_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0} = State)
1326+
-spec shuffle_s(list(), state()) -> {list(), state()}.
1327+
shuffle_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0} = State)
13301328
when is_list(List) ->
13311329
case List of
13321330
[] ->
@@ -1335,10 +1333,10 @@ shuffle1_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0} = State)
13351333
{List, State};
13361334
_ ->
13371335
WeakLowBits = maps:get(weak_low_bits, AlgHandler, 0),
1338-
{ShuffledList, R1} = shuffle1_r(List, Next, R0, WeakLowBits, []),
1336+
{ShuffledList, R1} = shuffle_r(List, Next, R0, WeakLowBits, []),
13391337
{ShuffledList, {AlgHandler, R1}}
13401338
end;
1341-
shuffle1_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
1339+
shuffle_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
13421340
when is_list(List), ?MASK(58) =< Mask ->
13431341
case List of
13441342
[] ->
@@ -1348,7 +1346,7 @@ shuffle1_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
13481346
_ ->
13491347
%% Old spec - assume 2 weak low bits
13501348
WeakLowBits = 2,
1351-
{ShuffledList, R1} = shuffle1_r(List, Next, R0, WeakLowBits, []),
1349+
{ShuffledList, R1} = shuffle_r(List, Next, R0, WeakLowBits, []),
13521350
{ShuffledList, {AlgHandler, R1}}
13531351
end.
13541352

@@ -1360,204 +1358,42 @@ shuffle1_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
13601358
%% produces a bias free shuffle.
13611359

13621360
%% Recursion entry point
1363-
shuffle1_r([X, Y], Next, R0, _WeakLowBits, Acc) ->
1361+
shuffle_r([X, Y], Next, R0, _WeakLowBits, Acc) ->
13641362
%% Optimization for 2 elements; the most common case for duplicates
13651363
{V, R1} = Next(R0),
13661364
if
13671365
%% Bit 7 should not be weak in any of the generators
13681366
V band 128 =:= 0 -> {[Y, X | Acc], R1};
13691367
true -> {[X, Y | Acc], R1}
13701368
end;
1371-
shuffle1_r(L, Next, R0, WeakLowBits, Acc) ->
1372-
shuffle1_tag(L, Next, R0, WeakLowBits, Acc, []).
1369+
shuffle_r(L, Next, R0, WeakLowBits, Acc) ->
1370+
shuffle_tag(L, Next, R0, WeakLowBits, Acc, []).
13731371

13741372
%% Tag elements with random integers
1375-
shuffle1_tag([], Next, R, WeakLowBits, Acc, TL) ->
1376-
%% Shuffle1; sort by random tag
1377-
shuffle1_untag(lists:keysort(1, TL), Next, R, WeakLowBits, Acc);
1378-
shuffle1_tag([X | L], Next, R0, WeakLowBits, Acc, TL) ->
1373+
shuffle_tag([], Next, R, WeakLowBits, Acc, TL) ->
1374+
%% Shuffle; sort by random tag
1375+
shuffle_untag(lists:keysort(1, TL), Next, R, WeakLowBits, Acc);
1376+
shuffle_tag([X | L], Next, R0, WeakLowBits, Acc, TL) ->
13791377
{V, R1} = Next(R0),
13801378
T = V bsr WeakLowBits,
1381-
shuffle1_tag(L, Next, R1, WeakLowBits, Acc, [{T,X} | TL]).
1379+
shuffle_tag(L, Next, R1, WeakLowBits, Acc, [{T,X} | TL]).
13821380

13831381
%% Strip the tag integers
1384-
shuffle1_untag([{T,X}, {T,Y} | TL], Next, R, WeakLowBits, Acc) ->
1382+
shuffle_untag([{T,X}, {T,Y} | TL], Next, R, WeakLowBits, Acc) ->
13851383
%% Random number duplicate
1386-
shuffle1_untag(TL, Next, R, WeakLowBits, Acc, [Y, X], T);
1387-
shuffle1_untag([{_,X} | TL], Next, R, WeakLowBits, Acc) ->
1388-
shuffle1_untag(TL, Next, R, WeakLowBits, [X | Acc]);
1389-
shuffle1_untag([], _Next, R, _WeakLowBits, Acc) ->
1384+
shuffle_untag(TL, Next, R, WeakLowBits, Acc, [Y, X], T);
1385+
shuffle_untag([{_,X} | TL], Next, R, WeakLowBits, Acc) ->
1386+
shuffle_untag(TL, Next, R, WeakLowBits, [X | Acc]);
1387+
shuffle_untag([], _Next, R, _WeakLowBits, Acc) ->
13901388
{Acc, R}.
13911389
%%
13921390
%% Collect duplicates
1393-
shuffle1_untag([{T,X} | TL], Next, R, WeakLowBits, Acc, Dups, T) ->
1394-
shuffle1_untag(TL, Next, R, WeakLowBits, Acc, [X | Dups], T);
1395-
shuffle1_untag(TL, Next, R0, WeakLowBits, Acc0, Dups, _T) ->
1396-
%% Shuffle1 the duplicates onto the result
1397-
{Acc1, R1} = shuffle1_r(Dups, Next, R0, WeakLowBits, Acc0),
1398-
shuffle1_untag(TL, Next, R1, WeakLowBits, Acc1).
1399-
1400-
%% -------
1401-
1402-
-spec shuffle2(list()) -> list().
1403-
shuffle2(List) ->
1404-
{ShuffledList, State} = shuffle2_s(List, seed_get()),
1405-
_ = seed_put(State),
1406-
ShuffledList.
1407-
1408-
-spec shuffle2_s(list(), state()) -> {list(), state()}.
1409-
shuffle2_s(List, State)
1410-
when is_list(List) ->
1411-
case List of
1412-
[] ->
1413-
{List, State};
1414-
[_] ->
1415-
{List, State};
1416-
_ ->
1417-
M = maps:from_list(lists:enumerate(List)),
1418-
N = maps:size(M),
1419-
shuffle2_s(M, State, N, [])
1420-
end.
1421-
1422-
%% Classical Fisher-Yates shuffle, a.k.a Knuth shuffle.
1423-
%% See the Wikipedia article "Fisher-Yates shuffle".
1424-
%%
1425-
%% This variant uses a map with integer keys as array
1426-
%% and is optimized in that it minimizes map updates
1427-
%% since the high index is never used again, so an overwrite
1428-
%% can be used instead of an exchange.
1429-
1430-
shuffle2_s(M0, State0, N, Acc)
1431-
when is_map(M0), is_integer(N) ->
1432-
if
1433-
N =:= 0 -> {Acc, State0};
1434-
true ->
1435-
X = maps:get(N, M0),
1436-
case uniform_s(N, State0) of
1437-
{N, State1} ->
1438-
shuffle2_s(M0, State1, N - 1, [X | Acc]);
1439-
{K, State1} when is_integer(K) ->
1440-
Y = maps:get(K, M0),
1441-
M1 = maps:update(K, X, M0),
1442-
shuffle2_s(M1, State1, N - 1, [Y | Acc])
1443-
end
1444-
end.
1445-
1446-
%% -------
1447-
1448-
-spec shuffle3(list()) -> list().
1449-
shuffle3(List) ->
1450-
{ShuffledList, State} = shuffle3_s(List, seed_get()),
1451-
_ = seed_put(State),
1452-
ShuffledList.
1453-
1454-
-spec shuffle3_s(list(), state()) -> {list(), state()}.
1455-
shuffle3_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0} = State)
1456-
when is_list(List) ->
1457-
case List of
1458-
[] ->
1459-
{List, State};
1460-
[_] ->
1461-
{List, State};
1462-
_ ->
1463-
WeakLowBits = maps:get(weak_low_bits, AlgHandler, 0),
1464-
T = gb_trees:empty(),
1465-
{ShuffledList, R1} = shuffle3_r(List, Next, R0, WeakLowBits, T),
1466-
{ShuffledList, {AlgHandler, R1}}
1467-
end;
1468-
shuffle3_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
1469-
when is_list(List), ?MASK(58) =< Mask ->
1470-
case List of
1471-
[] ->
1472-
{List, State};
1473-
[_] ->
1474-
{List, State};
1475-
_ ->
1476-
%% Old spec - assume 2 weak low bits
1477-
WeakLowBits = 2,
1478-
T = gb_trees:empty(),
1479-
{ShuffledList, R1} = shuffle3_r(List, Next, R0, WeakLowBits, T),
1480-
{ShuffledList, {AlgHandler, R1}}
1481-
end.
1482-
1483-
%% See the Wikipedia article "Fisher-Yates shuffle", section "Sorting".
1484-
%%
1485-
%% To avoid bias due to duplicate random numbers, a gb_tree
1486-
%% is used to check if a random number has already been used,
1487-
%% and if so generate a new random number.
1488-
%%
1489-
%% Because a gb_tree is sorted no sorting needs to be done,
1490-
%% it is enough to extract the values of the gb_tree that are
1491-
%% ordered in key sort order.
1492-
1493-
shuffle3_r([], _Next, R, _WeakLowBits, T) ->
1494-
{gb_trees:values(T), R};
1495-
shuffle3_r([X | L] , Next, R0, WeakLowBits, T) ->
1496-
{V, R1} = Next(R0),
1497-
K = V bsr WeakLowBits,
1498-
case gb_trees:is_defined(K, T) of
1499-
false ->
1500-
shuffle3_r(L, Next, R1, WeakLowBits, gb_trees:insert(K, X, T));
1501-
true ->
1502-
shuffle3_r([X | L], Next, R1, WeakLowBits, T)
1503-
end.
1504-
1505-
%% -------
1506-
1507-
-spec shuffle4(list()) -> list().
1508-
shuffle4(List) ->
1509-
{ShuffledList, State} = shuffle4_s(List, seed_get()),
1510-
_ = seed_put(State),
1511-
ShuffledList.
1512-
1513-
-spec shuffle4_s(list(), state()) -> {list(), state()}.
1514-
shuffle4_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0} = State)
1515-
when is_list(List) ->
1516-
case List of
1517-
[] ->
1518-
{List, State};
1519-
[_] ->
1520-
{List, State};
1521-
_ ->
1522-
WeakLowBits = maps:get(weak_low_bits, AlgHandler, 0),
1523-
{ShuffledList, R1} = shuffle4_r(List, Next, R0, WeakLowBits, #{}),
1524-
{ShuffledList, {AlgHandler, R1}}
1525-
end;
1526-
shuffle4_s(List, {#{max:=Mask, next:=Next} = AlgHandler, R0} = State)
1527-
when is_list(List), ?MASK(58) =< Mask ->
1528-
case List of
1529-
[] ->
1530-
{List, State};
1531-
[_] ->
1532-
{List, State};
1533-
_ ->
1534-
%% Old spec - assume 2 weak low bits
1535-
WeakLowBits = 2,
1536-
{ShuffledList, R1} = shuffle4_r(List, Next, R0, WeakLowBits, #{}),
1537-
{ShuffledList, {AlgHandler, R1}}
1538-
end.
1539-
1540-
%% See the Wikipedia article "Fisher-Yates shuffle", section "Sorting".
1541-
%%
1542-
%% To avoid bias due to duplicate random numbers, a map
1543-
%% is used to check if a random number has already been used,
1544-
%% and if so generate a new random number.
1545-
%%
1546-
%% Actual sorting doesn't is not needed. A map is ordered by key
1547-
%% and therefore it is enough to extract the values of the map.
1548-
%% The internal map key order will do just fine.
1549-
1550-
shuffle4_r([], _Next, R, _WeakLowBits, M) ->
1551-
{maps:values(M), R};
1552-
shuffle4_r([X | L] , Next, R0, WeakLowBits, M) ->
1553-
{V, R1} = Next(R0),
1554-
K = V bsr WeakLowBits,
1555-
case maps:is_key(K, M) of
1556-
true ->
1557-
shuffle4_r([X | L], Next, R1, WeakLowBits, M);
1558-
false ->
1559-
shuffle4_r(L, Next, R1, WeakLowBits, maps:put(K, X, M))
1560-
end.
1391+
shuffle_untag([{T,X} | TL], Next, R, WeakLowBits, Acc, Dups, T) ->
1392+
shuffle_untag(TL, Next, R, WeakLowBits, Acc, [X | Dups], T);
1393+
shuffle_untag(TL, Next, R0, WeakLowBits, Acc0, Dups, _T) ->
1394+
%% Shuffle the duplicates onto the result
1395+
{Acc1, R1} = shuffle_r(Dups, Next, R0, WeakLowBits, Acc0),
1396+
shuffle_untag(TL, Next, R1, WeakLowBits, Acc1).
15611397

15621398
%% =====================================================================
15631399
%% Internal functions

0 commit comments

Comments
 (0)