Skip to content

Commit c343123

Browse files
Merge pull request pmengal#24 from reinaldocoelho/master
Pull request to bug on parser to emails 8bit/utf-8/flowed
2 parents 556ab29 + 1a40492 commit c343123

5 files changed

Lines changed: 640 additions & 23 deletions

File tree

Class Library/ActiveUp.Net.Common/Parser.cs

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -117,48 +117,56 @@ private static ContentType GetContentType(string input)
117117
/// <returns></returns>
118118
private static ContentDisposition GetContentDisposition(string input)
119119
{
120-
ContentDisposition field = new ContentDisposition();
121-
//TODO: include TAB detection in Regex
122-
field.Disposition = Regex.Match(input.Replace("\t", ""), @"(?<=: ?)\S+?(?=([;\s]|\Z))").Value;
120+
var field = new ContentDisposition
121+
{
122+
//TODO: include TAB detection in Regex
123+
Disposition = Regex.Match(input.Replace("\t", ""), @"(?<=: ?)\S+?(?=([;\s]|\Z))").Value
124+
};
123125
//TODO: include TAB detection in Regex
124126
Match parammatch = Regex.Match(input.Replace("\t", ""), @"(?<=;[ \t]?)[^;]*=[^;]*(?=(;|\Z))");
125127
for (; parammatch.Success; parammatch = parammatch.NextMatch()) field.Parameters.Add(FormatFieldName(parammatch.Value.Substring(0, parammatch.Value.IndexOf('='))), parammatch.Value.Substring(parammatch.Value.IndexOf('=') + 1).Replace("\"", "").Trim('\r', '\n'));
126128
return field;
127129
}
128130

131+
[Obsolete("We are migrating ASCII default char structure to UTF8, please, use GetUtf8ByteCountOfPart() as a default.")]
129132
private static int GetASCIIByteCountOfPart(string part)
130133
{
131134
return Encoding.ASCII.GetByteCount(part);
132135
}
133136

137+
private static int GetUtf8ByteCountOfPart(string part)
138+
{
139+
return Encoding.UTF8.GetByteCount(part);
140+
}
141+
134142
/// <summary>
135143
/// Parses the sub parts.
136144
/// </summary>
137145
/// <param name="part">The part.</param>
138146
private static void ParseSubParts(ref MimePart part, Message message)
139147
{
140148
string boundary = part.ContentType.Parameters["boundary"];
141-
string parentPartAsciiBody = ToUtf8(part.BinaryContent);
149+
string parentPartUtf8Body = ToUtf8(part.BinaryContent);
142150
byte[] parentPartBinary = part.BinaryContent;
143151

144152
Logger.AddEntry(typeof(Parser), "boundary : " + boundary);
145-
string[] arrpart = Regex.Split(parentPartAsciiBody, @"\r?\n?" + Regex.Escape("--" + boundary));
153+
string[] arrpart = Regex.Split(parentPartUtf8Body, @"\r?\n?" + Regex.Escape("--" + boundary));
146154

147155
foreach (var strpart in arrpart)
148156
{
149157
if (string.IsNullOrWhiteSpace(strpart))
150158
continue;
151159

152-
int bounaryByteLen = GetASCIIByteCountOfPart(parentPartAsciiBody.Substring(0, parentPartAsciiBody.IndexOf(strpart)));
153-
int binaryPartLen = bounaryByteLen + GetASCIIByteCountOfPart(strpart);
154-
parentPartAsciiBody = null;
160+
int bounaryByteLen = GetUtf8ByteCountOfPart(parentPartUtf8Body.Substring(0, parentPartUtf8Body.IndexOf(strpart)));
161+
int binaryPartLen = bounaryByteLen + GetUtf8ByteCountOfPart(strpart);
162+
parentPartUtf8Body = null;
155163

156164
//complete Part (incl. boundary)
157165
byte[] binaryPart = new byte[binaryPartLen];
158166
Array.Copy(parentPartBinary, binaryPart, binaryPart.Length);
159167

160168
//Body only (without Boundary)
161-
byte[] binaryBody = new byte[GetASCIIByteCountOfPart(strpart)];
169+
byte[] binaryBody = new byte[GetUtf8ByteCountOfPart(strpart)];
162170
Array.Copy(binaryPart, bounaryByteLen, binaryBody, 0, binaryBody.Length);
163171

164172
//Remove Subpart from ParentPart
@@ -171,7 +179,7 @@ private static void ParseSubParts(ref MimePart part, Message message)
171179
GC.WaitForPendingFinalizers();
172180

173181
parentPartBinary = tmp;
174-
parentPartAsciiBody = ToUtf8(parentPartBinary);
182+
parentPartUtf8Body = ToUtf8(parentPartBinary);
175183
tmp = null;
176184

177185
if (!strpart.StartsWith("--") && !string.IsNullOrEmpty(strpart))
@@ -497,7 +505,7 @@ private static void ParseBody(byte[] binaryData, MimePart part, int bodyStart)
497505
{
498506
if (bodyStart < part.OriginalContent.Length)
499507
{
500-
string body = part.OriginalContent.Substring(bodyStart);
508+
var body = part.OriginalContent.Substring(bodyStart);
501509
part.BinaryContent = GetBinaryPart(binaryData, body);
502510
}
503511
}
@@ -506,23 +514,26 @@ private static void ParseBody(byte[] binaryData, MimePart part, int bodyStart)
506514
/// Parses the MIME part.
507515
/// </summary>
508516
/// <param name="binaryData">The data.</param>
517+
/// <param name="message">Message object to update</param>
509518
/// <returns></returns>
510519
public static MimePart ParseMimePart(byte[] binaryData, Message message)
511520
{
512-
MimePart part = new MimePart();
513-
part.ParentMessage = message;
514-
part.OriginalContent = ToUtf8(binaryData); //ASCII content for header parsing
521+
var part = new MimePart
522+
{
523+
ParentMessage = message,
524+
OriginalContent = ToUtf8(binaryData) //UTF8 content for header parsing
525+
};
515526

516527
try
517528
{
518529
// Separate header and body.
519-
int headerEnd = Regex.Match(part.OriginalContent, @".(?=\r?\n\r?\n)").Index + 1;
520-
int bodyStart = Regex.Match(part.OriginalContent, @"(?<=\r?\n\r?\n).").Index;
530+
var headerEnd = Regex.Match(part.OriginalContent, @".(?=\r?\n\r?\n)").Index + 1;
531+
var bodyStart = Regex.Match(part.OriginalContent, @"(?<=\r?\n\r?\n).").Index;
521532

522533
//TODO: remove this workaround
523534
if (bodyStart == 0)
524535
{
525-
int indexBody = part.OriginalContent.IndexOf("\r\n\r\n");
536+
var indexBody = part.OriginalContent.IndexOf("\r\n\r\n");
526537
if (indexBody > 0)
527538
bodyStart = indexBody;
528539
}
@@ -533,12 +544,12 @@ public static MimePart ParseMimePart(byte[] binaryData, Message message)
533544

534545
// Build the part tree.
535546
// This is a container part.
536-
if (part.ContentType.Type.ToLower().Equals("multipart"))
547+
if (part.ContentType.Type.ToLower().Trim().Equals("multipart"))
537548
{
538549
ParseSubParts(ref part, message);
539550
}
540551
// This is a nested message.
541-
else if (part.ContentType.Type.ToLower().Equals("message"))
552+
else if (part.ContentType.Type.ToLower().Trim().Equals("message"))
542553
{
543554
// TODO: Create an interpreter to this.
544555
}
@@ -560,9 +571,9 @@ public static MimePart ParseMimePart(byte[] binaryData, Message message)
560571
}
561572

562573

563-
private static byte[] GetBinaryPart(byte[] srcData, string asciiPart)
574+
private static byte[] GetBinaryPart(byte[] srcData, string utf8Part)
564575
{
565-
byte[] result = new byte[GetASCIIByteCountOfPart(asciiPart)];
576+
var result = new byte[GetUtf8ByteCountOfPart(utf8Part)];
566577
Array.Copy(srcData, (srcData.Length - result.Length), result, 0, result.Length);
567578

568579
return result;

Class Library/ActiveUp.Net.Tests/ActiveUp.Net.Tests.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@
102102
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
103103
</Content>
104104
<None Include="packages.config" />
105+
<None Include="resource\content-transfer-encode-8bit-utf8-flowed.eml">
106+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
107+
</None>
105108
<None Include="resource\confirm_read_parse_problem.eml">
106109
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
107110
</None>

Class Library/ActiveUp.Net.Tests/Common/ParserTests.cs

Lines changed: 14 additions & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)