From 290f6c794cad150c70141151487a97dbe82cab04 Mon Sep 17 00:00:00 2001 From: "Adam D. Ruppe" Date: Fri, 1 Mar 2013 15:07:38 -0500 Subject: [PATCH] more read support --- email.d | 582 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 575 insertions(+), 7 deletions(-) diff --git a/email.d b/email.d index d4da6d9..ddc3c60 100644 --- a/email.d +++ b/email.d @@ -6,6 +6,8 @@ pragma(lib, "curl"); import std.base64; import std.string; +import arsd.characterencodings; + // SEE ALSO: std.net.curl.SMTP struct RelayInfo { @@ -14,6 +16,14 @@ struct RelayInfo { string password; } +struct MimeAttachment { + string type; + string filename; + const(void)[] content; + string id; +} + + class EmailMessage { void setHeader(string name, string value) { headers ~= name ~ ": " ~ value; @@ -47,13 +57,6 @@ class EmailMessage { textBody = htmlToText(html); } - struct MimeAttachment { - string type; - string filename; - const(void)[] content; - string id; - } - const(MimeAttachment)[] attachments; void addAttachment(string mimeType, string filename, in void[] content, string id = null) { @@ -230,6 +233,220 @@ void email(string to, string subject, string message, string from, RelayInfo mai import std.conv; +// for reading +class MimePart { + string[] headers; + immutable(ubyte)[] content; + string textContent; + MimePart[] stuff; + + string name; + string charset; + string type; + string transferEncoding; + string disposition; + string id; + string filename; + + MimeAttachment toMimeAttachment() { + MimeAttachment att; + att.type = type; + att.filename = filename; + att.id = id; + att.content = content; + return att; + } + + this(immutable(ubyte)[][] lines, string contentType = null) { + string boundary; + + void parseContentType(string content) { + foreach(k, v; breakUpHeaderParts(content)) { + switch(k) { + case "root": + type = v; + break; + case "name": + name = v; + break; + case "charset": + charset = v; + break; + case "boundary": + boundary = v; + break; + default: + } + } + } + + if(contentType is null) { + // read headers immediately... + auto copyOfLines = lines; + immutable(ubyte)[] currentHeader; + + void commitHeader() { + if(currentHeader.length == 0) + return; + string h = decodeEncodedWord(cast(string) currentHeader); + headers ~= h; + currentHeader = null; + + auto idx = h.indexOf(":"); + if(idx != -1) { + auto name = h[0 .. idx].strip.toLower; + auto content = h[idx + 1 .. $].strip; + + switch(name) { + case "content-type": + parseContentType(content); + break; + case "content-transfer-encoding": + transferEncoding = content.toLower; + break; + case "content-disposition": + foreach(k, v; breakUpHeaderParts(content)) { + switch(k) { + case "root": + disposition = v; + break; + case "filename": + filename = v; + break; + default: + } + } + break; + case "content-id": + id = content; + break; + default: + } + } + } + + foreach(line; copyOfLines) { + lines = lines[1 .. $]; + if(line.length == 0) + break; + + if(line[0] == ' ' || line[0] == '\t') + currentHeader ~= (cast(string) line).stripLeft(); + else { + if(currentHeader.length) { + commitHeader(); + } + currentHeader = line; + } + } + + commitHeader(); + } else { + parseContentType(contentType); + } + + // if it is multipart, find the start boundary. we'll break it up and fill in stuff + // otherwise, all the data that follows is just content + + if(boundary.length) { + immutable(ubyte)[][] partLines; + bool inPart; + foreach(line; lines) { + if(line.startsWith("--" ~ boundary)) { + if(inPart) + stuff ~= new MimePart(partLines); + inPart = true; + partLines = null; + + if(line == "--" ~ boundary ~ "--") + break; // all done + } + + if(inPart) { + partLines ~= line; + } else { + content ~= line ~ '\n'; + } + } + } else { + foreach(line; lines) { + content ~= line; + + if(transferEncoding != "base64") + content ~= '\n'; + } + } + + // decode the content.. + switch(transferEncoding) { + case "base64": + content = Base64.decode(cast(string) content); + break; + case "quoted-printable": + content = decodeQuotedPrintable(cast(string) content); + break; + default: + // no change needed (I hope) + } + + if(type.indexOf("text/") == 0) { + if(charset.length == 0) + charset = "latin1"; + textContent = convertToUtf8(content, charset); + } + } +} + +string[string] breakUpHeaderParts(string headerContent) { + string[string] ret; + + string currentName = "root"; + string currentContent; + bool inQuote; + bool gettingName = false; + bool ignoringSpaces = false; + foreach(char c; headerContent) { + if(ignoringSpaces) { + if(c == ' ') + continue; + else + ignoringSpaces = false; + } + + if(gettingName) { + if(c == '=') { + gettingName = false; + continue; + } + currentName ~= c; + } + + if(c == '"') { + inQuote = !inQuote; + break; + } + + if(!inQuote && c == ';') { + ret[currentName] = currentContent; + ignoringSpaces = true; + currentName = null; + currentContent = null; + + gettingName = true; + continue; + } + + if(!gettingName) + currentContent ~= c; + } + + if(currentName.length) + ret[currentName] = currentContent; + + return ret; +} + +// for writing class MimeContainer { private static int sequence; @@ -282,3 +499,354 @@ class MimeContainer { return ret; } } + +import std.algorithm : startsWith; +class IncomingEmailMessage { + this(ref immutable(ubyte)[][] mboxLines) { + + enum ParseState { + lookingForFrom, + readingHeaders, + readingBody + } + + auto state = ParseState.lookingForFrom; + string contentType; + + bool isMultipart; + bool isHtml; + immutable(ubyte)[][] mimeLines; + + string charset = "latin-1"; + + string headerName; + string headerContent; + void commitHeader() { + if(headerName is null) + return; + + headerName = headerName.toLower(); + headerContent = headerContent.strip(); + + headerContent = decodeEncodedWord(headerContent); + + if(headerName == "content-type") { + contentType = headerContent; + if(contentType.indexOf("multipart/") != -1) + isMultipart = true; + else if(contentType.indexOf("text/html") != -1) + isHtml = true; + + auto charsetIdx = contentType.indexOf("charset="); + if(charsetIdx != -1) { + string cs = contentType[charsetIdx + "charset=".length .. $]; + if(cs.length && cs[0] == '\"') + cs = cs[1 .. $]; + + auto quoteIdx = cs.indexOf("\""); + if(quoteIdx != -1) + cs = cs[0 .. quoteIdx]; + auto semicolonIdx = cs.indexOf(";"); + if(semicolonIdx != -1) + cs = cs[0 .. semicolonIdx]; + + cs = cs.strip(); + if(cs.length) + charset = cs.toLower(); + } + } else if(headerName == "from") { + this.from = headerContent; + } else if(headerName == "subject") { + this.subject = headerContent; + } + + // FIXME: do I have to worry about content-transfer-encoding here? I think procmail takes care of it but I'm not entirely sure + + headers[headerName] = headerContent; + headerName = null; + headerContent = null; + } + + lineLoop: while(mboxLines.length) { + // this can needlessly convert headers too, but that won't harm anything since they are 7 bit anyway + auto line = convertToUtf8(mboxLines[0], charset); + line = line.stripRight; + + final switch(state) { + case ParseState.lookingForFrom: + if(line.startsWith("From ")) + state = ParseState.readingHeaders; + break; + case ParseState.readingHeaders: + if(line.length == 0) { + commitHeader(); + state = ParseState.readingBody; + } else { + if(line[0] == ' ' || line[0] == '\t') { + headerContent ~= " " ~ line.stripLeft(); + } else { + commitHeader(); + + auto idx = line.indexOf(":"); + if(idx == -1) + headerName = line; + else { + headerName = line[0 .. idx]; + headerContent = line[idx + 1 .. $].stripLeft(); + } + } + } + break; + case ParseState.readingBody: + if(line.startsWith("From ")) { + break lineLoop; // we're at the beginning of the next messsage + } + if(line.startsWith(">>From") || line.startsWith(">From")) { + line = line[1 .. $]; + } + + if(isMultipart) { + mimeLines ~= mboxLines[0]; + } else if(isHtml) { + // html with no alternative and no attachments + htmlMessageBody ~= line ~ "\n"; + } else { + // plain text! + textMessageBody ~= line ~ "\n"; + } + break; + } + + mboxLines = mboxLines[1 .. $]; + } + + if(mimeLines.length) { + auto part = new MimePart(mimeLines, contentType); + deeperInTheMimeTree: + switch(part.type) { + case "text/html": + htmlMessageBody = part.textContent; + break; + case "text/plain": + textMessageBody = part.textContent; + break; + case "multipart/alternative": + foreach(p; part.stuff) { + if(p.type == "text/html") + htmlMessageBody = p.textContent; + else if(p.type == "text/plain") + textMessageBody = p.textContent; + } + break; + case "multipart/related": + // the first one is the message itself + // after that comes attachments that can be rendered inline + if(part.stuff.length) { + auto msg = part.stuff[0]; + foreach(thing; part.stuff[1 .. $]) { + // FIXME: should this be special? + attachments ~= thing.toMimeAttachment(); + } + part = msg; + goto deeperInTheMimeTree; + } + break; + case "multipart/mixed": + if(part.stuff.length) { + auto msg = part.stuff[0]; + foreach(thing; part.stuff[1 .. $]) { + attachments ~= thing.toMimeAttachment(); + } + part = msg; + goto deeperInTheMimeTree; + } + + // FIXME: the more proper way is: + // check the disposition + // if none, concat it to make a text message body + // if inline it is prolly an image to be concated in the other body + // if attachment, it is an attachment + break; + default: + // FIXME: correctly handle more + } + } + + if(htmlMessageBody.length > 0 && textMessageBody.length == 0) { + import arsd.htmltotext; + textMessageBody = htmlToText(htmlMessageBody); + textAutoConverted = true; + } + } + + string[string] headers; + + string subject; + + string htmlMessageBody; + string textMessageBody; + + string from; + + bool textAutoConverted; + + MimeAttachment[] attachments; +} + +struct MboxMessages { + immutable(ubyte)[][] linesRemaining; + + this(immutable(ubyte)[] data) { + linesRemaining = splitLinesWithoutDecoding(data); + popFront(); + } + + IncomingEmailMessage currentFront; + + IncomingEmailMessage front() { + return currentFront; + } + + bool empty() { + return currentFront is null; + } + + void popFront() { + if(linesRemaining.length) + currentFront = new IncomingEmailMessage(linesRemaining); + else + currentFront = null; + } +} + +MboxMessages processMboxData(immutable(ubyte)[] data) { + return MboxMessages(data); +} + +immutable(ubyte)[][] splitLinesWithoutDecoding(immutable(ubyte)[] data) { + immutable(ubyte)[][] ret; + + size_t starting = 0; + bool justSaw13 = false; + foreach(idx, b; data) { + if(b == 13) + justSaw13 = true; + + if(b == 10) { + auto use = idx; + if(justSaw13) + use--; + + ret ~= data[starting .. use]; + starting = idx + 1; + } + + if(b != 13) + justSaw13 = false; + } + + if(starting < data.length) + ret ~= data[starting .. $]; + + return ret; +} + +string decodeEncodedWord(string data) { + string originalData = data; + + auto delimiter = data.indexOf("=?"); + if(delimiter == -1) + return data; + + string ret; + + while(delimiter != -1) { + ret ~= data[0 .. delimiter]; + data = data[delimiter + 2 .. $]; + + string charset; + string encoding; + string encodedText; + + // FIXME: the insane things should probably throw an + // exception that keeps a copy of orignal data for use later + + auto questionMark = data.indexOf("?"); + if(questionMark == -1) return originalData; // not sane + + charset = data[0 .. questionMark]; + data = data[questionMark + 1 .. $]; + + questionMark = data.indexOf("?"); + if(questionMark == -1) return originalData; // not sane + + encoding = data[0 .. questionMark]; + data = data[questionMark + 1 .. $]; + + questionMark = data.indexOf("?="); + if(questionMark == -1) return originalData; // not sane + + encodedText = data[0 .. questionMark]; + data = data[questionMark + 2 .. $]; + + delimiter = data.indexOf("=?"); + + immutable(ubyte)[] decodedText; + if(encoding == "Q") + decodedText = decodeQuotedPrintable(encodedText); + else if(encoding == "B") + decodedText = cast(typeof(decodedText)) Base64.decode(encodedText); + else + return originalData; // wtf + + ret ~= convertToUtf8(decodedText, charset); + } + + ret ~= data; // keep the rest since there could be trailing stuff + + return ret; +} + +immutable(ubyte)[] decodeQuotedPrintable(string text) { + immutable(ubyte)[] ret; + + int state = 0; + ubyte hexByte; + foreach(b; cast(immutable(ubyte)[]) text) { + switch(state) { + case 0: + if(b == '=') { + state++; + hexByte = 0; + } else + ret ~= b; + break; + case 1: + if(b == '\n') { + state = 0; + continue; + } + goto case; + case 2: + int value; + if(b >= '0' && b <= '9') + value = b - '0'; + else if(b >= 'A' && b <= 'F') + value = b - 'A' + 10; + else if(b >= 'a' && b <= 'f') + value = b - 'a' + 10; + if(state == 1) { + hexByte |= value << 4; + state++; + } else { + hexByte |= value; + ret ~= hexByte; + state = 0; + } + break; + default: assert(0); + } + } + + return ret; +}