Skip to content

Commit 9ea73d1

Browse files
committed
Optimize Utf8Validator with constant input Vector.slice API
1 parent ed13810 commit 9ea73d1

File tree

1 file changed

+28
-55
lines changed

1 file changed

+28
-55
lines changed

src/main/java/org/simdjson/Utf8Validator.java

Lines changed: 28 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import static jdk.incubator.vector.VectorOperators.LSHL;
1212
import static jdk.incubator.vector.VectorOperators.LSHR;
1313
import static jdk.incubator.vector.VectorOperators.NE;
14-
import static jdk.incubator.vector.VectorOperators.UNSIGNED_GE;
15-
import static jdk.incubator.vector.VectorOperators.UNSIGNED_GT;
14+
import static jdk.incubator.vector.VectorOperators.UGE;
15+
import static jdk.incubator.vector.VectorOperators.UGT;
1616
import static jdk.incubator.vector.VectorShuffle.iota;
1717
import static org.simdjson.VectorUtils.BYTE_SPECIES;
1818
import static org.simdjson.VectorUtils.INT_SPECIES;
@@ -40,8 +40,6 @@ class Utf8Validator {
4040
private static final byte TWO_CONTINUATIONS = (byte) (1 << 7);
4141
private static final byte MAX_2_LEADING_BYTE = (byte) 0b110_11111;
4242
private static final byte MAX_3_LEADING_BYTE = (byte) 0b1110_1111;
43-
private static final int TWO_BYTES_SIZE = Byte.SIZE * 2;
44-
private static final int THREE_BYTES_SIZE = Byte.SIZE * 3;
4543
private static final ByteVector BYTE_1_HIGH_LOOKUP = createByte1HighLookup();
4644
private static final ByteVector BYTE_1_LOW_LOOKUP = createByte1LowLookup();
4745
private static final ByteVector BYTE_2_HIGH_LOOKUP = createByte2HighLookup();
@@ -52,84 +50,63 @@ class Utf8Validator {
5250
private static final int STEP_SIZE = BYTE_SPECIES.vectorByteSize();
5351

5452
static void validate(byte[] buffer, int length) {
55-
long previousIncomplete = 0;
53+
int offset = 0;
5654
long errors = 0;
57-
int previousFourUtf8Bytes = 0;
58-
55+
long previousIncomplete = 0;
5956
int loopBound = BYTE_SPECIES.loopBound(length);
60-
int offset = 0;
57+
ByteVector previousChunk = ByteVector.broadcast(BYTE_SPECIES, 0);
58+
6159
for (; offset < loopBound; offset += STEP_SIZE) {
6260
ByteVector chunk = ByteVector.fromArray(BYTE_SPECIES, buffer, offset);
6361
IntVector chunkAsInts = chunk.reinterpretAsInts();
6462
// ASCII fast path can bypass the checks that are only required for multibyte code points.
6563
if (chunk.and(ALL_ASCII_MASK).compare(EQ, 0).allTrue()) {
6664
errors |= previousIncomplete;
6765
} else {
68-
previousIncomplete = chunk.compare(UNSIGNED_GE, INCOMPLETE_CHECK).toLong();
69-
// Shift the input forward by four bytes to make space for the previous four bytes.
70-
// The previous three bytes are required for validation, pulling in the last integer
71-
// will give the previous four bytes. The switch to integer vectors is to allow for
72-
// integer shifting instead of the more expensive shuffle / slice operations.
73-
IntVector chunkWithPreviousFourBytes = chunkAsInts
74-
.rearrange(FOUR_BYTES_FORWARD_SHIFT)
75-
.withLane(0, previousFourUtf8Bytes);
76-
// Shift the current input forward by one byte to include one byte from the previous chunk.
77-
ByteVector previousOneByte = chunkAsInts
78-
.lanewise(LSHL, Byte.SIZE)
79-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, THREE_BYTES_SIZE))
80-
.reinterpretAsBytes();
66+
previousIncomplete = chunk.compare(UGE, INCOMPLETE_CHECK).toLong();
67+
// Pull in last byte from previous chunk.
68+
ByteVector previousOneByte = previousChunk.slice(BYTE_SPECIES.length() - 1, chunk);
69+
8170
ByteVector byte2HighNibbles = chunkAsInts.lanewise(LSHR, 4)
8271
.reinterpretAsBytes()
8372
.and(LOW_NIBBLE_MASK);
8473
ByteVector byte1HighNibbles = previousOneByte.reinterpretAsInts()
8574
.lanewise(LSHR, 4)
8675
.reinterpretAsBytes()
8776
.and(LOW_NIBBLE_MASK);
77+
8878
ByteVector byte1LowNibbles = previousOneByte.and(LOW_NIBBLE_MASK);
8979
ByteVector byte1HighState = byte1HighNibbles.selectFrom(BYTE_1_HIGH_LOOKUP);
9080
ByteVector byte1LowState = byte1LowNibbles.selectFrom(BYTE_1_LOW_LOOKUP);
9181
ByteVector byte2HighState = byte2HighNibbles.selectFrom(BYTE_2_HIGH_LOOKUP);
9282
ByteVector firstCheck = byte1HighState.and(byte1LowState).and(byte2HighState);
83+
9384
// All remaining checks are for invalid 3 and 4-byte sequences, which either have too many
9485
// continuation bytes or not enough.
95-
ByteVector previousTwoBytes = chunkAsInts
96-
.lanewise(LSHL, TWO_BYTES_SIZE)
97-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, TWO_BYTES_SIZE))
98-
.reinterpretAsBytes();
86+
ByteVector previousTwoBytes = previousChunk.slice(BYTE_SPECIES.length() - 2, chunk);
87+
9988
// The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences.
100-
VectorMask<Byte> is3ByteLead = previousTwoBytes.compare(UNSIGNED_GT, MAX_2_LEADING_BYTE);
101-
ByteVector previousThreeBytes = chunkAsInts
102-
.lanewise(LSHL, THREE_BYTES_SIZE)
103-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, Byte.SIZE))
104-
.reinterpretAsBytes();
89+
VectorMask<Byte> is3ByteLead = previousTwoBytes.compare(UGT, MAX_2_LEADING_BYTE);
90+
ByteVector previousThreeBytes = previousChunk.slice(BYTE_SPECIES.length() - 3, chunk);
91+
10592
// The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences.
106-
VectorMask<Byte> is4ByteLead = previousThreeBytes.compare(UNSIGNED_GT, MAX_3_LEADING_BYTE);
93+
VectorMask<Byte> is4ByteLead = previousThreeBytes.compare(UGT, MAX_3_LEADING_BYTE);
10794
// The firstCheck vector contains 0x80 values on continuation byte indexes.
10895
// The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out.
10996
ByteVector secondCheck = firstCheck.add((byte) 0x80, is3ByteLead.or(is4ByteLead));
11097
errors |= secondCheck.compare(NE, 0).toLong();
11198
}
112-
previousFourUtf8Bytes = chunkAsInts.lane(INT_SPECIES.length() - 1);
99+
previousChunk = chunk;
113100
}
114101

115102
// If the input file doesn't align with the vector width, pad the missing bytes with zeros.
116103
VectorMask<Byte> remainingBytes = BYTE_SPECIES.indexInRange(offset, length);
117104
ByteVector chunk = ByteVector.fromArray(BYTE_SPECIES, buffer, offset, remainingBytes);
118105
if (!chunk.and(ALL_ASCII_MASK).compare(EQ, 0).allTrue()) {
119106
IntVector chunkAsInts = chunk.reinterpretAsInts();
120-
previousIncomplete = chunk.compare(UNSIGNED_GE, INCOMPLETE_CHECK).toLong();
121-
// Shift the input forward by four bytes to make space for the previous four bytes.
122-
// The previous three bytes are required for validation, pulling in the last integer
123-
// will give the previous four bytes. The switch to integer vectors is to allow for
124-
// integer shifting instead of the more expensive shuffle / slice operations.
125-
IntVector chunkWithPreviousFourBytes = chunkAsInts
126-
.rearrange(FOUR_BYTES_FORWARD_SHIFT)
127-
.withLane(0, previousFourUtf8Bytes);
128-
// Shift the current input forward by one byte to include one byte from the previous chunk.
129-
ByteVector previousOneByte = chunkAsInts
130-
.lanewise(LSHL, Byte.SIZE)
131-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, THREE_BYTES_SIZE))
132-
.reinterpretAsBytes();
107+
previousIncomplete = chunk.compare(UGE, INCOMPLETE_CHECK).toLong();
108+
// Pull in last byte from previous chunk.
109+
ByteVector previousOneByte = previousChunk.slice(BYTE_SPECIES.length() - 1, chunk);
133110
ByteVector byte2HighNibbles = chunkAsInts.lanewise(LSHR, 4)
134111
.reinterpretAsBytes()
135112
.and(LOW_NIBBLE_MASK);
@@ -144,18 +121,14 @@ static void validate(byte[] buffer, int length) {
144121
ByteVector firstCheck = byte1HighState.and(byte1LowState).and(byte2HighState);
145122
// All remaining checks are for invalid 3 and 4-byte sequences, which either have too many
146123
// continuation bytes or not enough.
147-
ByteVector previousTwoBytes = chunkAsInts
148-
.lanewise(LSHL, TWO_BYTES_SIZE)
149-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, TWO_BYTES_SIZE))
150-
.reinterpretAsBytes();
124+
// Pull in last two bytes from previous chunk.
125+
ByteVector previousTwoBytes = previousChunk.slice(BYTE_SPECIES.length() - 2, chunk);
151126
// The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences.
152-
VectorMask<Byte> is3ByteLead = previousTwoBytes.compare(UNSIGNED_GT, MAX_2_LEADING_BYTE);
153-
ByteVector previousThreeBytes = chunkAsInts
154-
.lanewise(LSHL, THREE_BYTES_SIZE)
155-
.or(chunkWithPreviousFourBytes.lanewise(LSHR, Byte.SIZE))
156-
.reinterpretAsBytes();
127+
VectorMask<Byte> is3ByteLead = previousTwoBytes.compare(UGT, MAX_2_LEADING_BYTE);
128+
ByteVector previousThreeBytes = previousChunk.slice(BYTE_SPECIES.length() - 3, chunk);
157129
// The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences.
158-
VectorMask<Byte> is4ByteLead = previousThreeBytes.compare(UNSIGNED_GT, MAX_3_LEADING_BYTE);
130+
// Pull in last three bytes from previous chunk.
131+
VectorMask<Byte> is4ByteLead = previousThreeBytes.compare(UGT, MAX_3_LEADING_BYTE);
159132
// The firstCheck vector contains 0x80 values on continuation byte indexes.
160133
// The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out.
161134
ByteVector secondCheck = firstCheck.add((byte) 0x80, is3ByteLead.or(is4ByteLead));

0 commit comments

Comments
 (0)