Skip to content

Commit ac8b968

Browse files
committed
Fixed MimeReader/ExperimentalMimeParser to handle really long mbox markers
Working on making MimeReader/ExperimentalMimeParser more robust.
1 parent b83f08e commit ac8b968

5 files changed

+170
-37
lines changed

MimeKit/AsyncMimeReader.cs

+12-9
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,8 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)
7474

7575
async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
7676
{
77-
int mboxMarkerIndex, mboxMarkerLength;
7877
bool midline = false;
7978
bool complete;
80-
int left = 0;
8179

8280
// consume data until we find a line that begins with "From "
8381
do {
@@ -98,26 +96,31 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
9896
} while (!complete);
9997

10098
var mboxMarkerOffset = GetOffset (inputIndex);
99+
var mboxMarkerLineNumber = lineNumber;
101100

102-
// FIXME: if the mbox marker is > the size of the input buffer, parsing will fail
103-
do {
104-
var available = await ReadAheadAsync (Math.Max (ReadAheadSize, left + 1), 0, cancellationToken).ConfigureAwait (false);
101+
OnMboxMarkerBegin (mboxMarkerOffset, lineNumber, cancellationToken);
105102

106-
if (available <= left) {
103+
do {
104+
if (await ReadAheadAsync (ReadAheadSize, 0, cancellationToken).ConfigureAwait (false) < 1) {
107105
// failed to find the end of the mbox marker; EOF reached
108106
state = MimeParserState.Error;
109-
inputIndex = inputEnd;
110107
return;
111108
}
112109

110+
int startIndex = inputIndex;
111+
int count;
112+
113113
unsafe {
114114
fixed (byte* inbuf = input) {
115-
complete = StepMboxMarker (inbuf, ref left, out mboxMarkerIndex, out mboxMarkerLength);
115+
complete = StepMboxMarker (inbuf, out count);
116116
}
117117
}
118+
119+
// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerReadAsync() in v5.0
120+
await OnMboxMarkerReadAsync (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken).ConfigureAwait (false);
118121
} while (!complete);
119122

120-
await OnMboxMarkerReadAsync (input, mboxMarkerIndex, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken).ConfigureAwait (false);
123+
OnMboxMarkerEnd (mboxMarkerOffset, mboxMarkerLineNumber, GetOffset (inputIndex), cancellationToken);
121124

122125
state = MimeParserState.MessageHeaders;
123126
}

MimeKit/ExperimentalMimeParser.cs

+22-5
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,22 @@ void PopEntity ()
266266

267267
#region Mbox Events
268268

269+
/// <summary>
270+
/// Called when an Mbox marker is encountered in the stream.
271+
/// </summary>
272+
/// <remarks>
273+
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters an Mbox marker.</para>
274+
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
275+
/// </remarks>
276+
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
277+
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
278+
/// <param name="cancellationToken">The cancellation token.</param>
279+
protected override void OnMboxMarkerBegin (long beginOffset, int lineNumber, CancellationToken cancellationToken)
280+
{
281+
mboxMarkerOffset = beginOffset;
282+
mboxMarkerLength = 0;
283+
}
284+
269285
/// <summary>
270286
/// Called when an Mbox marker is encountered in the stream.
271287
/// </summary>
@@ -281,12 +297,13 @@ void PopEntity ()
281297
/// <param name="cancellationToken">The cancellation token.</param>
282298
protected override void OnMboxMarkerRead (byte[] buffer, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken)
283299
{
284-
if (mboxMarkerBuffer.Length < count)
285-
Array.Resize (ref mboxMarkerBuffer, count);
300+
int needed = mboxMarkerLength + count;
286301

287-
Buffer.BlockCopy (buffer, startIndex, mboxMarkerBuffer, 0, count);
288-
mboxMarkerOffset = beginOffset;
289-
mboxMarkerLength = count;
302+
if (mboxMarkerBuffer.Length < needed)
303+
Array.Resize (ref mboxMarkerBuffer, needed);
304+
305+
Buffer.BlockCopy (buffer, startIndex, mboxMarkerBuffer, mboxMarkerLength, count);
306+
mboxMarkerLength += count;
290307
}
291308

292309
#endregion Mbox Events

MimeKit/MimeReader.cs

+50-23
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,20 @@ public virtual void SetStream (Stream stream, MimeFormat format = MimeFormat.Def
220220

221221
#region Mbox Events
222222

223+
/// <summary>
224+
/// Called when an Mbox marker is encountered in the stream.
225+
/// </summary>
226+
/// <remarks>
227+
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters an Mbox marker.</para>
228+
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
229+
/// </remarks>
230+
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
231+
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
232+
/// <param name="cancellationToken">The cancellation token.</param>
233+
protected virtual void OnMboxMarkerBegin (long beginOffset, int lineNumber, CancellationToken cancellationToken)
234+
{
235+
}
236+
223237
/// <summary>
224238
/// Called when an Mbox marker is encountered in the stream.
225239
/// </summary>
@@ -257,6 +271,21 @@ protected virtual Task OnMboxMarkerReadAsync (byte[] buffer, int startIndex, int
257271
return Task.CompletedTask;
258272
}
259273

274+
/// <summary>
275+
/// Called when the end of an Mbox marker is encountered in the stream.
276+
/// </summary>
277+
/// <remarks>
278+
/// <para>When the stream is specified to be in <see cref="MimeFormat.Mbox"/> format, this method will be called whenever the parser encounters the end of an Mbox marker.</para>
279+
/// <para>It is not necessary to override this method unless it is desirable to track the offsets of mbox markers within a stream or to extract the mbox marker itself.</para>
280+
/// </remarks>
281+
/// <param name="beginOffset">The offset into the stream where the mbox marker begins.</param>
282+
/// <param name="lineNumber">The line number where the mbox marker exists within the stream.</param>
283+
/// <param name="endOffset">The offset into the stream where the mbox marker ends.</param>
284+
/// <param name="cancellationToken">The cancellation token.</param>
285+
protected virtual void OnMboxMarkerEnd (long beginOffset, int lineNumber, long endOffset, CancellationToken cancellationToken)
286+
{
287+
}
288+
260289
#endregion Mbox Events
261290

262291
#region Header Events
@@ -1255,11 +1284,10 @@ unsafe bool StepMboxMarkerStart (byte* inbuf, ref bool midline)
12551284
return false;
12561285
}
12571286

1258-
unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex, out int mboxMarkerLength)
1287+
unsafe bool StepMboxMarker (byte* inbuf, out int count)
12591288
{
12601289
byte* inptr = inbuf + inputIndex;
12611290
byte* inend = inbuf + inputEnd;
1262-
int startIndex = inputIndex;
12631291
byte* start = inptr;
12641292

12651293
*inend = (byte) '\n';
@@ -1268,19 +1296,18 @@ unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex,
12681296
while (*inptr != (byte) '\n')
12691297
inptr++;
12701298

1299+
count = (int) (inptr - start);
1300+
1301+
// make sure not to consume the '\r' if it exists
1302+
if (inptr > start && *(inptr - 1) == (byte) '\r')
1303+
count--;
1304+
12711305
if (inptr == inend) {
1272-
// we don't have enough input data
1273-
left = (int) (inptr - start);
1274-
mboxMarkerLength = 0;
1275-
mboxMarkerIndex = 0;
1306+
// we've only consumed a partial mbox marker
1307+
inputIndex += count;
12761308
return false;
12771309
}
12781310

1279-
var markerLength = (int) (inptr - start);
1280-
1281-
if (inptr > start && *(inptr - 1) == (byte) '\r')
1282-
markerLength--;
1283-
12841311
// consume the '\n'
12851312
inptr++;
12861313

@@ -1289,18 +1316,13 @@ unsafe bool StepMboxMarker (byte* inbuf, ref int left, out int mboxMarkerIndex,
12891316
inputIndex += lineLength;
12901317
IncrementLineNumber (inputIndex);
12911318

1292-
mboxMarkerLength = markerLength;
1293-
mboxMarkerIndex = startIndex;
1294-
12951319
return true;
12961320
}
12971321

12981322
unsafe void StepMboxMarker (byte* inbuf, CancellationToken cancellationToken)
12991323
{
1300-
int mboxMarkerIndex, mboxMarkerLength;
13011324
bool midline = false;
13021325
bool complete;
1303-
int left = 0;
13041326

13051327
// consume data until we find a line that begins with "From "
13061328
do {
@@ -1317,22 +1339,27 @@ unsafe void StepMboxMarker (byte* inbuf, CancellationToken cancellationToken)
13171339
} while (!complete);
13181340

13191341
var mboxMarkerOffset = GetOffset (inputIndex);
1342+
var mboxMarkerLineNumber = lineNumber;
13201343

1321-
// FIXME: if the mbox marker is > the size of the input buffer, parsing will fail
1322-
do {
1323-
var available = ReadAhead (Math.Max (ReadAheadSize, left + 1), 0, cancellationToken);
1344+
OnMboxMarkerBegin (mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken);
13241345

1325-
if (available <= left) {
1346+
do {
1347+
if (ReadAhead (ReadAheadSize, 0, cancellationToken) < 1) {
13261348
// failed to find the end of the mbox marker; EOF reached
13271349
state = MimeParserState.Error;
1328-
inputIndex = inputEnd;
13291350
return;
13301351
}
13311352

1332-
complete = StepMboxMarker (inbuf, ref left, out mboxMarkerIndex, out mboxMarkerLength);
1353+
int startIndex = inputIndex;
1354+
int count;
1355+
1356+
complete = StepMboxMarker (inbuf, out count);
1357+
1358+
// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerRead() in v5.0
1359+
OnMboxMarkerRead (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken);
13331360
} while (!complete);
13341361

1335-
OnMboxMarkerRead (input, mboxMarkerIndex, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken);
1362+
OnMboxMarkerEnd (mboxMarkerOffset, mboxMarkerLineNumber, GetOffset (inputIndex), cancellationToken);
13361363

13371364
state = MimeParserState.MessageHeaders;
13381365
}

UnitTests/ExperimentalMimeParserTests.cs

+40
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,46 @@ public async Task TestDoubleMboxMarkerAsync ()
405405
}
406406
}
407407

408+
[Test]
409+
public void TestReallyLongMboxMarker ()
410+
{
411+
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
412+
var marker = "From " + new string ('X', 4092);
413+
414+
using (var stream = new MemoryStream ()) {
415+
var bytes = Encoding.ASCII.GetBytes (marker);
416+
stream.Write (bytes, 0, bytes.Length);
417+
stream.Write (content, 0, content.Length);
418+
stream.Position = 0;
419+
420+
var parser = new ExperimentalMimeParser (stream, MimeFormat.Mbox);
421+
var message = parser.ParseMessage ();
422+
423+
Assert.That (message.Headers.Count, Is.EqualTo (3));
424+
Assert.That (parser.MboxMarker, Is.EqualTo (marker));
425+
}
426+
}
427+
428+
[Test]
429+
public async Task TestReallyLongMboxMarkerAsync ()
430+
{
431+
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
432+
var marker = "From " + new string ('X', 4092);
433+
434+
using (var stream = new MemoryStream ()) {
435+
var bytes = Encoding.ASCII.GetBytes (marker);
436+
stream.Write (bytes, 0, bytes.Length);
437+
stream.Write (content, 0, content.Length);
438+
stream.Position = 0;
439+
440+
var parser = new ExperimentalMimeParser (stream, MimeFormat.Mbox);
441+
var message = await parser.ParseMessageAsync ();
442+
443+
Assert.That (message.Headers.Count, Is.EqualTo (3));
444+
Assert.That (parser.MboxMarker, Is.EqualTo (marker));
445+
}
446+
}
447+
408448
[Test]
409449
public void TestEmptyMessage ()
410450
{

UnitTests/MimeParserTests.cs

+46
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,52 @@ public async Task TestDoubleMboxMarkerAsync ()
389389
}
390390
}
391391

392+
[Test]
393+
public void TestReallyLongMboxMarker ()
394+
{
395+
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
396+
var marker = "From " + new string ('X', 4092);
397+
398+
using (var stream = new MemoryStream ()) {
399+
var bytes = Encoding.ASCII.GetBytes (marker);
400+
stream.Write (bytes, 0, bytes.Length);
401+
stream.Write (content, 0, content.Length);
402+
stream.Position = 0;
403+
404+
// FIXME: Fix MimeParser to handle this as well as ExperimentalMimeParser?
405+
var parser = new MimeParser (stream, MimeFormat.Mbox);
406+
//var message = parser.ParseMessage ();
407+
408+
//Assert.That (message.Headers.Count, Is.EqualTo (3));
409+
//Assert.That (parser.MboxMarker, Is.EqualTo (marker));
410+
411+
Assert.Throws<FormatException> (() => parser.ParseMessage ());
412+
}
413+
}
414+
415+
[Test]
416+
public void TestReallyLongMboxMarkerAsync ()
417+
{
418+
var content = Encoding.ASCII.GetBytes ("\r\nFrom: [email protected]\r\nTo: [email protected]\r\nSubject: test message\r\n\r\nBody text\r\n");
419+
var marker = "From " + new string ('X', 4092);
420+
421+
using (var stream = new MemoryStream ()) {
422+
var bytes = Encoding.ASCII.GetBytes (marker);
423+
stream.Write (bytes, 0, bytes.Length);
424+
stream.Write (content, 0, content.Length);
425+
stream.Position = 0;
426+
427+
// FIXME: Fix MimeParser to handle this as well as ExperimentalMimeParser?
428+
var parser = new MimeParser (stream, MimeFormat.Mbox);
429+
//var message = await parser.ParseMessageAsync ();
430+
431+
//Assert.That (message.Headers.Count, Is.EqualTo (3));
432+
//Assert.That (parser.MboxMarker, Is.EqualTo (marker));
433+
434+
Assert.ThrowsAsync<FormatException> (async () => await parser.ParseMessageAsync ());
435+
}
436+
}
437+
392438
[Test]
393439
public void TestEmptyMessage ()
394440
{

0 commit comments

Comments
 (0)