Skip to content

Commit 44c7dc5

Browse files
chore: improve slice_fields function (#88)
Co-authored-by: Tom French <[email protected]>
1 parent 37908a7 commit 44c7dc5

File tree

1 file changed

+122
-87
lines changed

1 file changed

+122
-87
lines changed

src/_string_tools/slice_packed_field.nr

Lines changed: 122 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -330,62 +330,50 @@ global tail_path_multipliers_chunk0: [Field; 32] = [
330330
];
331331

332332
// these path variables describe the location of a limb in an array
333-
// e.g. LAST_LIMB_PATH[5] produces 2^{5-1},
334-
// which is decomposed into 1 0 0 0 0, and the decompositions turned into an array M
335-
// M[4] = 1 i.e. the limbs[4] should contain the last limb
336-
// array extends to 0x200000000 which is 2^33 => 33 31 byte limbs = 1,023 bytes
333+
// e.g. LAST_LIMB_PATH[5] produces 0x00000000f (15 in decimal),
334+
// which is decomposed into ... 0 0 0 0 1 1 1 1 , meaning limbs 0,1,2,3 from the end should contain data
335+
// Each 1 bit in the decomposed array indicates that the corresponding limb should contain valid data
336+
// array extends to 0x3ffffffff which is 2^34 - 1 => 34 31 byte limbs = 1,055 bytes
337337
// this puts a hard limit on the max size of a key that this program supports.
338338
global LAST_LIMB_PATH: [Field; 36] = [
339-
0x000000000, // 0 0 0 0 0 0 0 0 0 <-- edge case because sometimes array index is -1, so we offset by 1 TODO explain better
340-
0x000000000, // 0 0 0 0 0 0 0 0 1
341-
0x000000001, // 0 0 0 0 0 0 0 1 0
342-
0x000000003, // 0 0 0 0 0 0 1 0 0
343-
0x000000007, // 0 0 0 0 0 1 0 0 0
344-
0x00000000f, // 0 0 0 0 1 0 0 0 0
345-
0x00000001f, // 0 0 0 1 0 0 0 0 0
346-
0x00000003f, // 0 0 1 0 0 0 0 0 0
347-
0x00000007f, // 0 1 0 0 0 0 0 0 0
348-
0x0000000ff, // 1 0 0 0 0 0 0 0 0
349-
0x0000001ff, // 0 0 0 0 0 0 0 1 0
350-
0x0000003ff, // 0 0 0 0 0 0 1 0 0
351-
0x0000007ff, // 0 0 0 0 0 1 0 0 0
352-
0x000000fff, // 0 0 0 0 1 0 0 0 0
353-
0x000001fff, // 0 0 0 1 0 0 0 0 0
354-
0x000003fff, // 0 0 1 0 0 0 0 0 0
355-
0x000007fff, // 0 1 0 0 0 0 0 0 0
356-
0x00000ffff, // 0 0 0 0 0 0 0 0 1
357-
0x00001ffff, // 0 0 0 0 0 0 0 1 0
358-
0x00003ffff, // 0 0 0 0 0 0 1 0 0
359-
0x00007ffff, // 0 0 0 0 0 1 0 0 0
360-
0x0000fffff, // 0 0 0 0 1 0 0 0 0
361-
0x0001fffff, // 0 0 0 1 0 0 0 0 0
362-
0x0003fffff, // 0 0 1 0 0 0 0 0 0
363-
0x0007fffff, // 0 1 0 0 0 0 0 0 0
364-
0x000ffffff, // 0 0 0 0 0 0 0 0 1
365-
0x001ffffff, // 0 0 0 0 0 0 0 1 0
366-
0x003ffffff, // 0 0 0 0 0 0 1 0 0
367-
0x007ffffff, // 0 0 0 0 0 1 0 0 0
368-
0x00fffffff, // 0 0 0 0 1 0 0 0 0
369-
0x01fffffff, // 0 0 0 1 0 0 0 0 0
370-
0x03fffffff, // 0 0 1 0 0 0 0 0 0
371-
0x07fffffff, // 0 1 0 0 0 0 0 0 0
339+
0x000000000, // 0 0 0 0 0 0 0 0 0 <-- edge case because sometimes array index is -1, so we offset by 1
340+
0x000000000, // 0 0 0 0 0 0 0 0 0
341+
0x000000001, // 1 0 0 0 0 0 0 0 0
342+
0x000000003, // 1 1 0 0 0 0 0 0 0
343+
0x000000007, // 1 1 1 0 0 0 0 0 0
344+
0x00000000f, // 1 1 1 1 0 0 0 0 0
345+
0x00000001f, // 1 1 1 1 1 0 0 0 0
346+
0x00000003f, // 1 1 1 1 1 1 0 0 0
347+
0x00000007f, // 1 1 1 1 1 1 1 0 0
348+
0x0000000ff, // 1 1 1 1 1 1 1 10
349+
0x0000001ff, // 1 1 1 1 1 1 1 1 1
350+
0x0000003ff, // 1 1 1 1 1 1 1 1 1 1
351+
0x0000007ff, // 1 1 1 1 1 1 1 1 1 1 1
352+
0x000000fff, // 1 1 1 1 1 1 1 1 1 1 1 1
353+
0x000001fff, // 1 1 1 1 1 1 1 1 1 1 1 1 1
354+
0x000003fff,
355+
0x000007fff,
356+
0x00000ffff,
357+
0x00001ffff,
358+
0x00003ffff,
359+
0x00007ffff,
360+
0x0000fffff,
361+
0x0001fffff,
362+
0x0003fffff,
363+
0x0007fffff,
364+
0x000ffffff,
365+
0x001ffffff,
366+
0x003ffffff,
367+
0x007ffffff,
368+
0x00fffffff,
369+
0x01fffffff,
370+
0x03fffffff,
371+
0x07fffffff,
372372
0x0ffffffff,
373373
0x1ffffffff,
374374
0x3ffffffff,
375375
];
376376

377-
global INTEGER_UP_TO_62_IS_GREATER_THAN_31: [bool; 63] = [
378-
false, false, false, false, false, false, false, false, false, false, false, false, false,
379-
false, false, false, false, false, false, false, false, false, false, false, false, false,
380-
false, false, false, false, false, false, true, true, true, true, true, true, true, true, true,
381-
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
382-
true, true, true, true, true, true,
383-
];
384-
global NUM_BYTES_MOD_31_IS_ZERO: [bool; 31] = [
385-
true, false, false, false, false, false, false, false, false, false, false, false, false, false,
386-
false, false, false, false, false, false, false, false, false, false, false, false, false,
387-
false, false, false, false,
388-
];
389377
global BYTE_SHIFT: [Field; 32] = [
390378
1,
391379
0x1000000000000000000000000000000000000000000000000000000000000,
@@ -584,20 +572,29 @@ fn divmod_31(numerator: u16) -> (u16, u16) {
584572
let rf = remainder as Field;
585573

586574
// note: these range checks are because we know the denominator is 31
587-
// TODO: need more checks, atm remainder could equal 31
588575
qf.assert_max_bit_size::<14>();
589576
rf.assert_max_bit_size::<5>();
577+
assert(remainder != 31);
590578

591579
// n / d = q
592580
// d * q + r = n
593581
assert(qf * 31 + rf == numerator as Field);
594582
(quotient, remainder)
595583
}
596584

597-
// 5 gates?
585+
/// Given the index of the last limb in a sequence of 31-byte fields, return a path mask that indicates which fields contain actual data.
586+
///
587+
/// The return value is a path mask that indicates which fields contain actual data up until the second to last limb.
588+
///
598589
pub fn get_last_limb_path<let OutputFields: u32>(last_limb_index: Field) -> [u1; OutputFields] {
599-
// TODO we offset by 1 explain why (0 byte length produces 0 - 1 which = invalid array index. we just add 1 and increase array length by 1 to compensate)
600-
let path = LAST_LIMB_PATH[cast_num_to_u32(last_limb_index + 1)]; // 2
590+
// The + 1 offset fixes a special case: when there are 0 bytes of data, last_limb_index becomes -1,
591+
// which would cause an error when trying to access the array. By adding 1, we turn -1 into 0,
592+
// which points to a special "empty" entry in the table. For all other cases,
593+
// the +1 just shifts us to the right spot in the table to get the correct bit pattern.
594+
// Then we offset by 1 again to exclude the last limb, which is calculated separately.
595+
// For example, if last_limb_index is 3, it means there are 4 valid limbs, then path variable is 7 (i.e. 00000111)
596+
// The return value path_valid_output represents the reverse of this number in array form [1, 1, 1, 0, 0, 0, 0, 0]
597+
let path = LAST_LIMB_PATH[cast_num_to_u32(last_limb_index + 1)];
601598
path.to_le_bits::<OutputFields>()
602599
}
603600

@@ -651,35 +648,44 @@ pub fn slice_field(f: Field, num_bytes: u32) -> (Field, Field) {
651648

652649
/**
653650
* @brief Given an array of fields that pack 31 bytes, return an array that slices the packed byte array at a given index for a given number of bytes
654-
* @description Some serious dark black magic nonsense going on here. TODO: document
655651
**/
656652
pub fn slice_fields<let InputFields: u32, let OutputFields: u32>(
657653
data: [Field; InputFields],
658-
start_byte: Field,
659-
num_bytes: Field,
654+
start_byte: Field, // starting byte position
655+
num_bytes: Field, // number of bytes to extract
660656
) -> [Field; OutputFields] {
661657
start_byte.assert_max_bit_size::<16>();
662658
num_bytes.assert_max_bit_size::<16>();
663659
// 3.5
660+
// calculate Starting Position
661+
// start_index: which field contains the starting byte
662+
// start_mod_31: Byte offset within that field (0-30)
663+
// num_underflow_bytes: same as start_mod_31 (bytes to skip in first field)
664664
let (start_index, start_mod_31) = divmod_31(start_byte as u16);
665665
let num_underflow_bytes = start_mod_31;
666666
// 3.5, 7
667+
668+
// num_bytes_div_31: Number of complete 31-byte fields needed
669+
// num_bytes_mod_31: Remaining bytes in the last field
667670
let (num_bytes_div_31, num_bytes_mod_31) = divmod_31(num_bytes as u16);
668671

669672
// 2, 9
670-
let num_bytes_mod_31_is_0 = NUM_BYTES_MOD_31_IS_ZERO[num_bytes_mod_31 as u32];
673+
//num_bytes_mod_31_is_0: True if num_bytes is a multiple of 31
674+
//num_bytes_div_31_is_0: True if num_bytes < 31
675+
let num_bytes_mod_31_is_0 = num_bytes_mod_31 == 0;
671676
// 2, 11
672-
let num_bytes_div_31_is_0 = NUM_BYTES_MOD_31_IS_ZERO[num_bytes_div_31 as u32];
677+
let num_bytes_div_31_is_0 = num_bytes_div_31 == 0;
673678

674-
// 1, 12
675-
let lookup = 62 - num_bytes_div_31_is_0 as Field * num_bytes - start_mod_31 as Field;
676-
std::as_witness(lookup);
679+
// Determines if all bytes can fit within the starting field.
677680
// 3, 15
678681
let bytes_fit_into_limb =
679-
INTEGER_UP_TO_62_IS_GREATER_THAN_31[cast_num_to_u32(lookup)] & num_bytes_div_31_is_0;
682+
start_mod_31 as u32 + cast_num_to_u32(num_bytes) < 31 & num_bytes_div_31_is_0;
680683
std::as_witness(bytes_fit_into_limb as Field);
681684

682685
// 2, 17
686+
// calculate bytes used in first Limb
687+
// if bytes fit: use all num_bytes
688+
// if bytes don't fit: use remaining bytes in first field
683689
let num_unused_bytes_in_start_limb = if bytes_fit_into_limb {
684690
num_bytes
685691
} else {
@@ -689,32 +695,44 @@ pub fn slice_fields<let InputFields: u32, let OutputFields: u32>(
689695
let num_remaining_bytes = num_bytes - num_unused_bytes_in_start_limb;
690696

691697
// 4.5, 21.5
698+
// process remaining bytes
692699
num_remaining_bytes.assert_max_bit_size::<16>();
693700
let mut (num_whole_limbs, num_overflow_bytes) = divmod_31(num_remaining_bytes as u16);
694701
// 44, 65.5
702+
// extracts the tail portion of the starting field (bytes we want to keep) and discards the head.
695703
let (_, tail) = slice_field(data[start_index as u32], num_underflow_bytes as u32);
696704

697705
// 4, 69.5
698-
let int_greater_than_61 =
699-
INTEGER_UP_TO_62_IS_GREATER_THAN_31[(31 + num_overflow_bytes - start_mod_31) as u32];
700-
let extra_head_section = int_greater_than_61 & !bytes_fit_into_limb;
706+
// 31 - start_mod_31 is the number of bytes we need from the first limb, num_overflow_bytes is the number of bytes we need from the last limb
707+
// if the sum is greater than 31, we need to start a new limb at the end. The data from first limb and last limb don't fit into one.
708+
let extra_head_section = 31 - start_mod_31 + num_overflow_bytes > 31 & !bytes_fit_into_limb;
701709

702710
// 1, 70.5
703-
let index_of_output_limb: Field = (num_bytes_div_31 as Field - num_bytes_mod_31_is_0 as Field);
711+
// the index of the last limb that contains data
712+
let index_of_output_limb = num_bytes_div_31 as Field - num_bytes_mod_31_is_0 as Field;
704713
// 5, 75.5
705-
let path_valid_output: [u1; OutputFields] = get_last_limb_path(index_of_output_limb);
714+
// index_of_output_limb is -1 if num_bytes is 0 as num_bytes_div_31 is 0 and num_bytes_mod_31_is_0 is 1
715+
// path_valid_output[i] = 1 means field i should contain actual data, 0 means it should be 0. For example, it could be [1, 1, 0]
716+
717+
let path_valid_output = get_last_limb_path::<OutputFields>(index_of_output_limb);
706718

707719
// 2, 77.5
720+
// For each new limb, we need to combined the end of the previous limb with the start of the new limb,
721+
// so we need to shift the previous limb by the number of bytes it has remaining
708722
let tail_shift = BYTE_SHIFT[cast_num_to_u32(num_unused_bytes_in_start_limb)];
709723

710724
// 51, 128.5
711-
let mut result: [Field; OutputFields] = [0; OutputFields];
725+
// process middle fields
726+
let mut result = [0; OutputFields];
727+
// starting with the remaining bytes from the first limb
712728
let mut previous = tail;
713729
for i in 0..(OutputFields - 1) {
714730
// 0
731+
// 1 if this limb should contain actual data
715732
let slice_valid = path_valid_output[i];
716733
// 1
717-
let data_index = (start_index as u32 + 1 + i);
734+
// the index of the current limb in the input array, +1 because we already processed the first limb in tail
735+
let data_index = (start_index as u32 + i + 1);
718736
// 2, 3
719737
let input_slice = data[data_index];
720738
// 44, 47
@@ -724,39 +742,32 @@ pub fn slice_fields<let InputFields: u32, let OutputFields: u32>(
724742
// 1, 49
725743
result[i] = combined * (slice_valid as Field);
726744
// 2, 51
727-
previous = (tail - previous) * (slice_valid as Field) + previous;
745+
// set to tail if slice_valid is 1, no change if slice_valid is 0
746+
previous = (tail - previous) * slice_valid as Field + previous;
728747
}
729748

730-
// 2, 130.5
749+
// handles the last limb
750+
// number of bytes to take from the last limb: if all bytes fit in one limb (last limb is first limb): use num_bytes + start_mod_31, else num_overflow_bytes
731751
let slice_size = (bytes_fit_into_limb as Field) * (num_bytes + start_mod_31 as Field)
732752
+ num_overflow_bytes as Field;
733-
734-
// 1, 131.5
753+
// use_previous_for_last_limb = does `previous` contain all the data we need for last limb?
735754
let use_previous_for_last_limb: Field =
736755
extra_head_section as Field + bytes_fit_into_limb as Field;
737-
738-
// 1, 132.5
739756
let index_of_overflow_limb = start_index + num_whole_limbs + 1;
740-
// 2, 134.5
741757
let last_limb_from_data = data[index_of_overflow_limb as u32];
742-
// 2, 136.5
758+
// if use_previous_for_last_limb = 1: slice_source = previous
759+
// if use_previous_for_last_limb = 0: slice_source = last_limb_from_data
743760
let slice_source =
744761
(previous - last_limb_from_data) * use_previous_for_last_limb + last_limb_from_data;
745-
746-
// 44, 180.5
747762
let (head, _) = slice_field(slice_source, cast_num_to_u32(slice_size));
748763

749-
// 3, 183.5
764+
// assembled the final field
750765
let previous_shift = BYTE_SHIFT[31 - num_overflow_bytes as u32]; // could save 1 gate by making different shift table
751-
// 2, 185.5
752766
let last_limb_shift = BYTE_SHIFT[num_bytes_mod_31 as u32];
753-
// 1, 186.5
754767
let mut last_limb = (previous * previous_shift);
755768
std::as_witness(last_limb);
756-
// 1, 187.5
757769
last_limb = last_limb * (-use_previous_for_last_limb) + last_limb + head;
758770
std::as_witness(last_limb);
759-
// 1, 188.5
760771
last_limb = last_limb * last_limb_shift;
761772
std::as_witness(last_limb);
762773

@@ -770,13 +781,40 @@ pub fn slice_fields<let InputFields: u32, let OutputFields: u32>(
770781
for i in 0..OutputFields {
771782
result[i] = (last_limb - result[i]) * path[i] + result[i];
772783
}
784+
773785
result
774786
}
775787

776788
mod test {
777789
use crate::_string_tools::slice_packed_field::slice_field;
778790
use crate::_string_tools::slice_packed_field::slice_fields;
779791

792+
mod get_last_limb_path {
793+
use crate::_string_tools::slice_packed_field::get_last_limb_path;
794+
795+
#[test]
796+
fn returns_expected_values() {
797+
// test cases taken from documentation on `LAST_LIMB_PATH`, note that indices are offset by 1 from
798+
// the value passed to `get_last_limb_path`.
799+
let mut test_cases: [[u1; 36]] = [[0; 36]; 36];
800+
for i in 0..36 {
801+
test_cases[i] = [0; 36];
802+
for j in 0..i {
803+
test_cases[i][j] = 1;
804+
}
805+
}
806+
for i in 0..35 {
807+
let path = get_last_limb_path::<36>(i as Field);
808+
let expected_path = test_cases[i];
809+
assert_eq(
810+
path,
811+
test_cases[i],
812+
f"Expected {path} to equal {expected_path} for index {i}",
813+
);
814+
}
815+
}
816+
}
817+
780818
unconstrained fn build_slices_for_test<let N: u32>(
781819
bytes: [u8; N],
782820
start: u32,
@@ -799,7 +837,6 @@ mod test {
799837
fn test_slice_fields_nolength() {
800838
let text: [u8; 1405] = "Charlie is genius, right. He's made from a million pieces of old bubble gum. Imagine that! In the summer of 1976, on his way home from an Alice Cooper concert, Charlie started to melt onto the pavement. It was too hot in L.A., and he melted like a pink bitch. Luckily though, there was Eric Phillips, a local crocodile who dabbled in black magic. He took pity on Charlie and scraped him off the floor with a pair of fish slicers. He poured him into an antique soup ladle, and boarded his magic carpet. Destination: Alaska! Eric Phillips decided to refreeze Charlie, but in his cold-blooded reptilian haste, he refroze him into to the shape of a Hoover. Charlie wasn't fazed though, he just zoomed about the place, sucking up Inuits. Ha ha! Oh. The Inuits didn't mind; they loved it in Charlie's pink, tight warm belly pouch, and they refused to come out. Charlie said, \"I'm cool with that,\" and set fire to a posh hammer to make it official. he downside was that the Inuits suffocated immediately. It was air-tight in there. Charlie panicked and fired the tiny Inuit bullets into Eric's crocodile peepers. The green shape was frozen. After a quick drink, Charlie stole Eric Phillips's magic carpet and left for Seattle. Charlie was racked with guilt: he'd killed 50 Inuits, noone needs that. He decided to spend the rest of his life putting small hairstyles onto boots, monkey nuts, trumpets and spanners."
801839
.as_bytes();
802-
println(f"text = {text}");
803840
let mut slices: [Field; 46 + 3] = [0; 46 + 3];
804841
for i in 0..46 {
805842
for j in 0..31 {
@@ -825,7 +862,6 @@ mod test {
825862
fn test_slice_fields() {
826863
let text: [u8; 1405] = "Charlie is genius, right. He's made from a million pieces of old bubble gum. Imagine that! In the summer of 1976, on his way home from an Alice Cooper concert, Charlie started to melt onto the pavement. It was too hot in L.A., and he melted like a pink bitch. Luckily though, there was Eric Phillips, a local crocodile who dabbled in black magic. He took pity on Charlie and scraped him off the floor with a pair of fish slicers. He poured him into an antique soup ladle, and boarded his magic carpet. Destination: Alaska! Eric Phillips decided to refreeze Charlie, but in his cold-blooded reptilian haste, he refroze him into to the shape of a Hoover. Charlie wasn't fazed though, he just zoomed about the place, sucking up Inuits. Ha ha! Oh. The Inuits didn't mind; they loved it in Charlie's pink, tight warm belly pouch, and they refused to come out. Charlie said, \"I'm cool with that,\" and set fire to a posh hammer to make it official. he downside was that the Inuits suffocated immediately. It was air-tight in there. Charlie panicked and fired the tiny Inuit bullets into Eric's crocodile peepers. The green shape was frozen. After a quick drink, Charlie stole Eric Phillips's magic carpet and left for Seattle. Charlie was racked with guilt: he'd killed 50 Inuits, noone needs that. He decided to spend the rest of his life putting small hairstyles onto boots, monkey nuts, trumpets and spanners."
827864
.as_bytes();
828-
println(f"text = {text}");
829865
let mut slices: [Field; 46 + 3] = [0; 46 + 3];
830866
for i in 0..46 {
831867
for j in 0..31 {
@@ -873,7 +909,6 @@ mod test {
873909
let input_bytes: [u8; 32] = input.to_be_bytes();
874910

875911
for i in 0..32 {
876-
println(f"i = {i}");
877912
let num_bytes = i;
878913
let (head, tail) = slice_field(input, num_bytes);
879914
let mut expected_head: Field = 0;

0 commit comments

Comments
 (0)