diff --git a/crates/m3/src/gadgets/hash/keccak/lookedup.rs b/crates/m3/src/gadgets/hash/keccak/lookedup.rs index 221436001..211809fde 100644 --- a/crates/m3/src/gadgets/hash/keccak/lookedup.rs +++ b/crates/m3/src/gadgets/hash/keccak/lookedup.rs @@ -2,6 +2,10 @@ //! The version of the Keccakf permutation taking the arithmetization approach that is based on //! stacked columns and lookups. +//! +//! **Note**: This implementation serves primarily as a proxy to measure lookup performance +//! characteristics. For production use, the stacked version (see [`super::stacked`]) is more +//! performant. // This implementation tries to be as close to the // [Keccak Specification Summary][keccak_spec_summary] and as such it is highly recommended to @@ -44,6 +48,11 @@ const STATE_OUT_TRACK: usize = 7; /// Keccak-f\[1600\] permutation function verification gadget. /// +/// **Performance Note**: This implementation uses lookup tables for the AND operation in the Chi +/// step of the permutation and serves primarily as a proxy to measure lookup performance +/// characteristics. For production use, consider the stacked version ([`super::stacked::Keccakf`]) +/// which is more performant as it uses direct arithmetic constraints instead of lookups. +/// /// This gadget consists of 3x horizontally combined batches of 8x rounds each, 24 rounds in total. /// You can think about it as 8x wide SIMD performing one permutation per a table row. Below is /// the graphical representation of the layout. @@ -470,20 +479,36 @@ impl LookedupRoundBatch { for xy in 0..25 { let x = xy % 5; let y = xy / 5; + // Get access to the merged column that will store the combined values used for lookup let mut merged: std::cell::RefMut<'_, [B32]> = index.get_scalars_mut(self.merged[(x, y)])?; + // Get the first input operand (B[x+1,y]) for the AND operation let b1: std::cell::Ref<'_, [B8]> = index.get_as(self.b[(x + 1, y)])?; + // Get the second input operand (B[x+2,y]) for the AND operation let b2: std::cell::Ref<'_, [B8]> = index.get_as(self.b[(x + 2, y)])?; + + // Get the result storage for the AND operation between inputs let mut b1_and_b2: std::cell::RefMut<'_, [B8]> = index.get_mut_as(self.b1_and_b2[(x, y)])?; + for i in 0..b2.len() { - // B[x,y] xor ((not B[x+1,y]) and B[x+2,y]) + // In the Chi step, we compute: B[x,y] xor ((not B[x+1,y]) and B[x+2,y]) + // Here we're computing just the AND part: B[x+1,y] & B[x+2,y] + // Note: This implementation actually computes B[x+1,y] & B[x+2,y], and the NOT + // operation is applied elsewhere in the circuit let in_a = b1[i].val(); let in_b = b2[i].val(); let output = in_a & in_b; + + // Store the AND result for later use in the final calculation b1_and_b2[i] = output.into(); + + // Prepare the lookup value by merging both inputs and the output into a single + // value This creates a 32-bit entry that will be used to perform a lookup in + // the bitwise AND table The lookup table contains all possible input-output + // combinations for the AND operation merged[i] = merge_bitand_vals(in_a, in_b, output).into(); } }