Skip to content

Commit ee1a14c

Browse files
committed
Update MimeReader to use Span<T> for built-in intrinsics
It's gotten to a point where .NET Core's built-in SIMD optimizations in Span<T>.IndexOf() have gotten as good or better than my current optimization for scanning until a '\n' in ScanContent(). It also allows me to now use Span<T>.IndexOf() in all other locations that were using `while (*inptr != '\n') inptr++`, so we get improved performance in those locations for free. In some ways, it's sad to see the end of an era - but also exciting because MimeReader is now faster than ever!
1 parent 167bd10 commit ee1a14c

File tree

2 files changed

+241
-404
lines changed

2 files changed

+241
-404
lines changed

MimeKit/AsyncMimeReader.cs

+23-89
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,7 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)
6363
return false;
6464
}
6565

66-
unsafe {
67-
fixed (byte* inbuf = input) {
68-
complete = StepByteOrderMark (inbuf, ref bomIndex);
69-
}
70-
}
66+
complete = StepByteOrderMark (ref bomIndex);
7167
} while (!complete && inputIndex == inputEnd);
7268

7369
return complete;
@@ -89,11 +85,7 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
8985
return;
9086
}
9187

92-
unsafe {
93-
fixed (byte* inbuf = input) {
94-
complete = StepMboxMarkerStart (inbuf, ref midline);
95-
}
96-
}
88+
complete = StepMboxMarkerStart (ref midline);
9789
} while (!complete);
9890

9991
var mboxMarkerOffset = GetOffset (inputIndex);
@@ -109,13 +101,8 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
109101
}
110102

111103
int startIndex = inputIndex;
112-
int count;
113104

114-
unsafe {
115-
fixed (byte* inbuf = input) {
116-
complete = StepMboxMarker (inbuf, out count);
117-
}
118-
}
105+
complete = StepMboxMarker (out int count);
119106

120107
// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerReadAsync() in v5.0
121108
await OnMboxMarkerReadAsync (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken).ConfigureAwait (false);
@@ -177,41 +164,27 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
177164
}
178165

179166
// Scan ahead a bit to see if this looks like an invalid header.
180-
do {
181-
unsafe {
182-
fixed (byte* inbuf = input) {
183-
if (TryDetectInvalidHeader (inbuf, out invalid, out fieldNameLength, out headerFieldLength))
184-
break;
185-
}
186-
}
187-
167+
while (!TryDetectInvalidHeader (out invalid, out fieldNameLength, out headerFieldLength)) {
188168
int atleast = (inputEnd - inputIndex) + 1;
189169

190170
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast) {
191171
// Not enough input to even find the ':'... mark as invalid and continue?
192172
invalid = true;
193173
break;
194174
}
195-
} while (true);
175+
}
196176

197177
if (invalid) {
198178
// Figure out why this is an invalid header.
199179

200180
if (input[inputIndex] == (byte) '-') {
201181
// Check for a boundary marker. If the message is properly formatted, this will NEVER happen.
202-
do {
203-
unsafe {
204-
fixed (byte* inbuf = input) {
205-
if (TryCheckBoundaryWithinHeaderBlock (inbuf))
206-
break;
207-
}
208-
}
209-
182+
while (!TryCheckBoundaryWithinHeaderBlock ()) {
210183
int atleast = (inputEnd - inputIndex) + 1;
211184

212185
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
213186
break;
214-
} while (true);
187+
}
215188

216189
// Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary.
217190
if (state == MimeParserState.Boundary)
@@ -220,19 +193,12 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
220193
// Fall through and act as if we're consuming a header.
221194
} else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') {
222195
// Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen.
223-
do {
224-
unsafe {
225-
fixed (byte* inbuf = input) {
226-
if (TryCheckMboxMarkerWithinHeaderBlock (inbuf))
227-
break;
228-
}
229-
}
230-
196+
while (!TryCheckMboxMarkerWithinHeaderBlock ()) {
231197
int atleast = (inputEnd - inputIndex) + 1;
232198

233199
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
234200
break;
235-
} while (true);
201+
}
236202

237203
// state will be one of the following values:
238204
// 1. Complete: This means that we've found an actual mbox marker
@@ -260,20 +226,13 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
260226
bool midline = true;
261227

262228
// Consume the header value.
263-
do {
264-
unsafe {
265-
fixed (byte* inbuf = input) {
266-
if (StepHeaderValue (inbuf, ref midline))
267-
break;
268-
}
269-
}
270-
229+
while (!StepHeaderValue (ref midline)) {
271230
if (await ReadAheadAsync (1, 0, cancellationToken).ConfigureAwait (false) == 0) {
272231
state = MimeParserState.Content;
273232
eof = true;
274233
break;
275234
}
276-
} while (true);
235+
}
277236

278237
if (toplevel && headerCount == 0 && invalid && !IsMboxMarker (headerBuffer)) {
279238
state = MimeParserState.Error;
@@ -293,12 +252,8 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
293252
async Task<bool> SkipLineAsync (bool consumeNewLine, CancellationToken cancellationToken)
294253
{
295254
do {
296-
unsafe {
297-
fixed (byte* inbuf = input) {
298-
if (InnerSkipLine (inbuf, consumeNewLine))
299-
return true;
300-
}
301-
}
255+
if (InnerSkipLine (consumeNewLine))
256+
return true;
302257

303258
if (await ReadAheadAsync (ReadAheadSize, 1, cancellationToken).ConfigureAwait (false) <= 0)
304259
return false;
@@ -346,11 +301,7 @@ async Task<ScanContentResult> ScanContentAsync (ScanContentType type, long begin
346301

347302
int contentIndex = inputIndex;
348303

349-
unsafe {
350-
fixed (byte* inbuf = input) {
351-
ScanContent (inbuf, ref nleft, ref midline, ref formats);
352-
}
353-
}
304+
ScanContent (ref nleft, ref midline, ref formats);
354305

355306
if (contentIndex < inputIndex) {
356307
switch (type) {
@@ -413,24 +364,11 @@ async Task<int> ConstructMessagePartAsync (int depth, CancellationToken cancella
413364
return 0;
414365
}
415366

416-
unsafe {
417-
fixed (byte* inbuf = input) {
418-
byte* start = inbuf + inputIndex;
419-
byte* inend = inbuf + inputEnd;
420-
byte* inptr = start;
421-
422-
*inend = (byte) '\n';
423-
424-
while (*inptr != (byte) '\n')
425-
inptr++;
426-
427-
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
428-
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
429-
// *never* be an Mbox "From " marker in Entity mode.
430-
if ((boundary = CheckBoundary (inputIndex, start, (int) (inptr - start))) != BoundaryType.None)
431-
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
432-
}
433-
}
367+
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
368+
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
369+
// *never* be an Mbox "From " marker in Entity mode.
370+
if ((boundary = CheckBoundary ()) != BoundaryType.None)
371+
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
434372
}
435373

436374
// Note: When parsing non-toplevel parts, the header parser will never result in the Error state.
@@ -626,14 +564,10 @@ async Task<int> ConstructMultipartAsync (ContentType contentType, int depth, Can
626564
// We either found the end of the stream or we found a parent's boundary
627565
PopBoundary ();
628566

629-
unsafe {
630-
fixed (byte* inbuf = input) {
631-
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
632-
boundary = BoundaryType.ImmediateEndBoundary;
633-
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
634-
boundary = BoundaryType.ImmediateBoundary;
635-
}
636-
}
567+
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (true))
568+
boundary = BoundaryType.ImmediateEndBoundary;
569+
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (false))
570+
boundary = BoundaryType.ImmediateBoundary;
637571

638572
endOffset = GetEndOffset (inputIndex);
639573

0 commit comments

Comments
 (0)