1111import static jdk .incubator .vector .VectorOperators .LSHL ;
1212import static jdk .incubator .vector .VectorOperators .LSHR ;
1313import static jdk .incubator .vector .VectorOperators .NE ;
14- import static jdk .incubator .vector .VectorOperators .UNSIGNED_GE ;
15- import static jdk .incubator .vector .VectorOperators .UNSIGNED_GT ;
14+ import static jdk .incubator .vector .VectorOperators .UGE ;
15+ import static jdk .incubator .vector .VectorOperators .UGT ;
1616import static jdk .incubator .vector .VectorShuffle .iota ;
1717import static org .simdjson .VectorUtils .BYTE_SPECIES ;
1818import static org .simdjson .VectorUtils .INT_SPECIES ;
@@ -40,8 +40,6 @@ class Utf8Validator {
4040 private static final byte TWO_CONTINUATIONS = (byte ) (1 << 7 );
4141 private static final byte MAX_2_LEADING_BYTE = (byte ) 0b110_11111;
4242 private static final byte MAX_3_LEADING_BYTE = (byte ) 0b1110_1111;
43- private static final int TWO_BYTES_SIZE = Byte .SIZE * 2 ;
44- private static final int THREE_BYTES_SIZE = Byte .SIZE * 3 ;
4543 private static final ByteVector BYTE_1_HIGH_LOOKUP = createByte1HighLookup ();
4644 private static final ByteVector BYTE_1_LOW_LOOKUP = createByte1LowLookup ();
4745 private static final ByteVector BYTE_2_HIGH_LOOKUP = createByte2HighLookup ();
@@ -52,84 +50,63 @@ class Utf8Validator {
5250 private static final int STEP_SIZE = BYTE_SPECIES .vectorByteSize ();
5351
5452 static void validate (byte [] buffer , int length ) {
55- long previousIncomplete = 0 ;
53+ int offset = 0 ;
5654 long errors = 0 ;
57- int previousFourUtf8Bytes = 0 ;
58-
55+ long previousIncomplete = 0 ;
5956 int loopBound = BYTE_SPECIES .loopBound (length );
60- int offset = 0 ;
57+ ByteVector previousChunk = ByteVector .broadcast (BYTE_SPECIES , 0 );
58+
6159 for (; offset < loopBound ; offset += STEP_SIZE ) {
6260 ByteVector chunk = ByteVector .fromArray (BYTE_SPECIES , buffer , offset );
6361 IntVector chunkAsInts = chunk .reinterpretAsInts ();
6462 // ASCII fast path can bypass the checks that are only required for multibyte code points.
6563 if (chunk .and (ALL_ASCII_MASK ).compare (EQ , 0 ).allTrue ()) {
6664 errors |= previousIncomplete ;
6765 } else {
68- previousIncomplete = chunk .compare (UNSIGNED_GE , INCOMPLETE_CHECK ).toLong ();
69- // Shift the input forward by four bytes to make space for the previous four bytes.
70- // The previous three bytes are required for validation, pulling in the last integer
71- // will give the previous four bytes. The switch to integer vectors is to allow for
72- // integer shifting instead of the more expensive shuffle / slice operations.
73- IntVector chunkWithPreviousFourBytes = chunkAsInts
74- .rearrange (FOUR_BYTES_FORWARD_SHIFT )
75- .withLane (0 , previousFourUtf8Bytes );
76- // Shift the current input forward by one byte to include one byte from the previous chunk.
77- ByteVector previousOneByte = chunkAsInts
78- .lanewise (LSHL , Byte .SIZE )
79- .or (chunkWithPreviousFourBytes .lanewise (LSHR , THREE_BYTES_SIZE ))
80- .reinterpretAsBytes ();
66+ previousIncomplete = chunk .compare (UGE , INCOMPLETE_CHECK ).toLong ();
67+ // Pull in last byte from previous chunk.
68+ ByteVector previousOneByte = previousChunk .slice (BYTE_SPECIES .length () - 1 , chunk );
69+
8170 ByteVector byte2HighNibbles = chunkAsInts .lanewise (LSHR , 4 )
8271 .reinterpretAsBytes ()
8372 .and (LOW_NIBBLE_MASK );
8473 ByteVector byte1HighNibbles = previousOneByte .reinterpretAsInts ()
8574 .lanewise (LSHR , 4 )
8675 .reinterpretAsBytes ()
8776 .and (LOW_NIBBLE_MASK );
77+
8878 ByteVector byte1LowNibbles = previousOneByte .and (LOW_NIBBLE_MASK );
8979 ByteVector byte1HighState = byte1HighNibbles .selectFrom (BYTE_1_HIGH_LOOKUP );
9080 ByteVector byte1LowState = byte1LowNibbles .selectFrom (BYTE_1_LOW_LOOKUP );
9181 ByteVector byte2HighState = byte2HighNibbles .selectFrom (BYTE_2_HIGH_LOOKUP );
9282 ByteVector firstCheck = byte1HighState .and (byte1LowState ).and (byte2HighState );
83+
9384 // All remaining checks are for invalid 3 and 4-byte sequences, which either have too many
9485 // continuation bytes or not enough.
95- ByteVector previousTwoBytes = chunkAsInts
96- .lanewise (LSHL , TWO_BYTES_SIZE )
97- .or (chunkWithPreviousFourBytes .lanewise (LSHR , TWO_BYTES_SIZE ))
98- .reinterpretAsBytes ();
86+ ByteVector previousTwoBytes = previousChunk .slice (BYTE_SPECIES .length () - 2 , chunk );
87+
9988 // The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences.
100- VectorMask <Byte > is3ByteLead = previousTwoBytes .compare (UNSIGNED_GT , MAX_2_LEADING_BYTE );
101- ByteVector previousThreeBytes = chunkAsInts
102- .lanewise (LSHL , THREE_BYTES_SIZE )
103- .or (chunkWithPreviousFourBytes .lanewise (LSHR , Byte .SIZE ))
104- .reinterpretAsBytes ();
89+ VectorMask <Byte > is3ByteLead = previousTwoBytes .compare (UGT , MAX_2_LEADING_BYTE );
90+ ByteVector previousThreeBytes = previousChunk .slice (BYTE_SPECIES .length () - 3 , chunk );
91+
10592 // The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences.
106- VectorMask <Byte > is4ByteLead = previousThreeBytes .compare (UNSIGNED_GT , MAX_3_LEADING_BYTE );
93+ VectorMask <Byte > is4ByteLead = previousThreeBytes .compare (UGT , MAX_3_LEADING_BYTE );
10794 // The firstCheck vector contains 0x80 values on continuation byte indexes.
10895 // The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out.
10996 ByteVector secondCheck = firstCheck .add ((byte ) 0x80 , is3ByteLead .or (is4ByteLead ));
11097 errors |= secondCheck .compare (NE , 0 ).toLong ();
11198 }
112- previousFourUtf8Bytes = chunkAsInts . lane ( INT_SPECIES . length () - 1 ) ;
99+ previousChunk = chunk ;
113100 }
114101
115102 // If the input file doesn't align with the vector width, pad the missing bytes with zeros.
116103 VectorMask <Byte > remainingBytes = BYTE_SPECIES .indexInRange (offset , length );
117104 ByteVector chunk = ByteVector .fromArray (BYTE_SPECIES , buffer , offset , remainingBytes );
118105 if (!chunk .and (ALL_ASCII_MASK ).compare (EQ , 0 ).allTrue ()) {
119106 IntVector chunkAsInts = chunk .reinterpretAsInts ();
120- previousIncomplete = chunk .compare (UNSIGNED_GE , INCOMPLETE_CHECK ).toLong ();
121- // Shift the input forward by four bytes to make space for the previous four bytes.
122- // The previous three bytes are required for validation, pulling in the last integer
123- // will give the previous four bytes. The switch to integer vectors is to allow for
124- // integer shifting instead of the more expensive shuffle / slice operations.
125- IntVector chunkWithPreviousFourBytes = chunkAsInts
126- .rearrange (FOUR_BYTES_FORWARD_SHIFT )
127- .withLane (0 , previousFourUtf8Bytes );
128- // Shift the current input forward by one byte to include one byte from the previous chunk.
129- ByteVector previousOneByte = chunkAsInts
130- .lanewise (LSHL , Byte .SIZE )
131- .or (chunkWithPreviousFourBytes .lanewise (LSHR , THREE_BYTES_SIZE ))
132- .reinterpretAsBytes ();
107+ previousIncomplete = chunk .compare (UGE , INCOMPLETE_CHECK ).toLong ();
108+ // Pull in last byte from previous chunk.
109+ ByteVector previousOneByte = previousChunk .slice (BYTE_SPECIES .length () - 1 , chunk );
133110 ByteVector byte2HighNibbles = chunkAsInts .lanewise (LSHR , 4 )
134111 .reinterpretAsBytes ()
135112 .and (LOW_NIBBLE_MASK );
@@ -144,18 +121,14 @@ static void validate(byte[] buffer, int length) {
144121 ByteVector firstCheck = byte1HighState .and (byte1LowState ).and (byte2HighState );
145122 // All remaining checks are for invalid 3 and 4-byte sequences, which either have too many
146123 // continuation bytes or not enough.
147- ByteVector previousTwoBytes = chunkAsInts
148- .lanewise (LSHL , TWO_BYTES_SIZE )
149- .or (chunkWithPreviousFourBytes .lanewise (LSHR , TWO_BYTES_SIZE ))
150- .reinterpretAsBytes ();
124+ // Pull in last two bytes from previous chunk.
125+ ByteVector previousTwoBytes = previousChunk .slice (BYTE_SPECIES .length () - 2 , chunk );
151126 // The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences.
152- VectorMask <Byte > is3ByteLead = previousTwoBytes .compare (UNSIGNED_GT , MAX_2_LEADING_BYTE );
153- ByteVector previousThreeBytes = chunkAsInts
154- .lanewise (LSHL , THREE_BYTES_SIZE )
155- .or (chunkWithPreviousFourBytes .lanewise (LSHR , Byte .SIZE ))
156- .reinterpretAsBytes ();
127+ VectorMask <Byte > is3ByteLead = previousTwoBytes .compare (UGT , MAX_2_LEADING_BYTE );
128+ ByteVector previousThreeBytes = previousChunk .slice (BYTE_SPECIES .length () - 3 , chunk );
157129 // The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences.
158- VectorMask <Byte > is4ByteLead = previousThreeBytes .compare (UNSIGNED_GT , MAX_3_LEADING_BYTE );
130+ // Pull in last three bytes from previous chunk.
131+ VectorMask <Byte > is4ByteLead = previousThreeBytes .compare (UGT , MAX_3_LEADING_BYTE );
159132 // The firstCheck vector contains 0x80 values on continuation byte indexes.
160133 // The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out.
161134 ByteVector secondCheck = firstCheck .add ((byte ) 0x80 , is3ByteLead .or (is4ByteLead ));
0 commit comments