D-Scanner/stdx/d/lexer.d

1487 lines
30 KiB
D

module stdx.d.lexer;
import std.typecons;
import std.typetuple;
import std.array;
import std.algorithm;
import std.range;
import stdx.lexer;
public import stdx.lexer : StringCache;
private enum staticTokens = [
",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
"!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
"+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
"==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
"^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
];
private enum pseudoTokens = [
"\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6",
"7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
"#line", "\u2028", "\u2029"
];
private enum possibleDefaultTokens = [
"abstract", "alias", "align", "asm", "assert", "auto", "body", "bool",
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
"char", "class", "const", "continue", "creal", "dchar", "debug", "default",
"delegate", "delete", "deprecated", "do", "double", "else", "enum",
"export", "extern", "false", "final", "finally", "float", "for", "foreach",
"foreach_reverse", "function", "goto", "idouble", "if", "ifloat",
"immutable", "import", "in", "inout", "int", "interface", "invariant",
"ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow",
"null", "out", "override", "package", "pragma", "private", "protected",
"public", "pure", "real", "ref", "return", "scope", "shared", "short",
"static", "struct", "super", "switch", "synchronized", "template", "this",
"throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent",
"uint", "ulong", "union", "unittest", "ushort", "version", "void",
"volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
"__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector",
"__VENDOR__", "__VERSION__"
];
private enum dynamicTokens = [
"specialTokenSequence", "comment", "identifier", "scriptLine",
"whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
"ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
"irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
"dstringLiteral", "stringLiteral", "wstringLiteral", "scriptLine"
];
public alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
public alias str = tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
public template tok(string token)
{
alias tok = TokenId!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens, token);
}
private enum extraFields = q{
string comment;
int opCmp(size_t i) const pure nothrow @safe {
if (index < i) return -1;
if (index > i) return 1;
return 0;
}
};
public alias Token = stdx.lexer.TokenStructure!(IdType, extraFields);
/**
* Configure string lexing behavior
*/
public enum StringBehavior : ubyte
{
/// Do not include quote characters, process escape sequences
compiler = 0b0000_0000,
/// Opening quotes, closing quotes, and string suffixes are included in the
/// string token
includeQuoteChars = 0b0000_0001,
/// String escape sequences are not replaced
notEscaped = 0b0000_0010,
/// Not modified at all. Useful for formatters or highlighters
source = includeQuoteChars | notEscaped
}
/**
* Configure whitespace handling behavior
*/
public enum WhitespaceBehavior : ubyte
{
/// Whitespace is skipped
skip,
/// Whitespace is treated as a token
include
}
/**
* Configure comment handling behavior
*/
public enum CommentBehavior : ubyte
{
/// Comments are attached to the non-whitespace token that follows them
attach,
/// Comments are tokens, and can be returned by calls to the token range's front()
include
}
public struct LexerConfig
{
string fileName;
StringBehavior stringBehavior;
WhitespaceBehavior whitespaceBehavior;
CommentBehavior commentBehavior;
}
public bool isBasicType(IdType type) nothrow pure @safe
{
switch (type)
{
case tok!"int":
case tok!"uint":
case tok!"double":
case tok!"idouble":
case tok!"float":
case tok!"ifloat":
case tok!"short":
case tok!"ushort":
case tok!"long":
case tok!"ulong":
case tok!"char":
case tok!"wchar":
case tok!"dchar":
case tok!"bool":
case tok!"void":
case tok!"cent":
case tok!"ucent":
case tok!"real":
case tok!"ireal":
case tok!"byte":
case tok!"ubyte":
case tok!"cdouble":
case tok!"cfloat":
case tok!"creal":
return true;
default:
return false;
}
}
public bool isNumberLiteral(IdType type) nothrow pure @safe
{
switch (type)
{
case tok!"doubleLiteral":
case tok!"floatLiteral":
case tok!"idoubleLiteral":
case tok!"ifloatLiteral":
case tok!"intLiteral":
case tok!"longLiteral":
case tok!"realLiteral":
case tok!"irealLiteral":
case tok!"uintLiteral":
case tok!"ulongLiteral":
return true;
default:
return false;
}
}
public bool isOperator(IdType type) nothrow pure @safe
{
switch (type)
{
case tok!",":
case tok!".":
case tok!"..":
case tok!"...":
case tok!"/":
case tok!"/=":
case tok!"!":
case tok!"!<":
case tok!"!<=":
case tok!"!<>":
case tok!"!<>=":
case tok!"!=":
case tok!"!>":
case tok!"!>=":
case tok!"$":
case tok!"%":
case tok!"%=":
case tok!"&":
case tok!"&&":
case tok!"&=":
case tok!"(":
case tok!")":
case tok!"*":
case tok!"*=":
case tok!"+":
case tok!"++":
case tok!"+=":
case tok!"-":
case tok!"--":
case tok!"-=":
case tok!":":
case tok!";":
case tok!"<":
case tok!"<<":
case tok!"<<=":
case tok!"<=":
case tok!"<>":
case tok!"<>=":
case tok!"=":
case tok!"==":
case tok!"=>":
case tok!">":
case tok!">=":
case tok!">>":
case tok!">>=":
case tok!">>>":
case tok!">>>=":
case tok!"?":
case tok!"@":
case tok!"[":
case tok!"]":
case tok!"^":
case tok!"^=":
case tok!"^^":
case tok!"^^=":
case tok!"{":
case tok!"|":
case tok!"|=":
case tok!"||":
case tok!"}":
case tok!"~":
case tok!"~=":
return true;
default:
return false;
}
}
public bool isKeyword(IdType type) pure nothrow @safe
{
switch (type)
{
case tok!"abstract":
case tok!"alias":
case tok!"align":
case tok!"asm":
case tok!"assert":
case tok!"auto":
case tok!"body":
case tok!"break":
case tok!"case":
case tok!"cast":
case tok!"catch":
case tok!"class":
case tok!"const":
case tok!"continue":
case tok!"debug":
case tok!"default":
case tok!"delegate":
case tok!"delete":
case tok!"deprecated":
case tok!"do":
case tok!"else":
case tok!"enum":
case tok!"export":
case tok!"extern":
case tok!"false":
case tok!"final":
case tok!"finally":
case tok!"for":
case tok!"foreach":
case tok!"foreach_reverse":
case tok!"function":
case tok!"goto":
case tok!"if":
case tok!"immutable":
case tok!"import":
case tok!"in":
case tok!"inout":
case tok!"interface":
case tok!"invariant":
case tok!"is":
case tok!"lazy":
case tok!"macro":
case tok!"mixin":
case tok!"module":
case tok!"new":
case tok!"nothrow":
case tok!"null":
case tok!"out":
case tok!"override":
case tok!"package":
case tok!"pragma":
case tok!"private":
case tok!"protected":
case tok!"public":
case tok!"pure":
case tok!"ref":
case tok!"return":
case tok!"scope":
case tok!"shared":
case tok!"static":
case tok!"struct":
case tok!"super":
case tok!"switch":
case tok!"synchronized":
case tok!"template":
case tok!"this":
case tok!"throw":
case tok!"true":
case tok!"try":
case tok!"typedef":
case tok!"typeid":
case tok!"typeof":
case tok!"union":
case tok!"unittest":
case tok!"version":
case tok!"volatile":
case tok!"while":
case tok!"with":
case tok!"__DATE__":
case tok!"__EOF__":
case tok!"__FILE__":
case tok!"__FUNCTION__":
case tok!"__gshared":
case tok!"__LINE__":
case tok!"__MODULE__":
case tok!"__parameters":
case tok!"__PRETTY_FUNCTION__":
case tok!"__TIME__":
case tok!"__TIMESTAMP__":
case tok!"__traits":
case tok!"__vector":
case tok!"__VENDOR__":
case tok!"__VERSION__":
return true;
default:
return false;
}
}
public bool isStringLiteral(IdType type) pure nothrow @safe
{
switch (type)
{
case tok!"dstringLiteral":
case tok!"stringLiteral":
case tok!"wstringLiteral":
return true;
default:
return false;
}
}
public bool isProtection(IdType type) pure nothrow @safe
{
switch (type)
{
case tok!"export":
case tok!"package":
case tok!"private":
case tok!"public":
case tok!"protected":
return true;
default:
return false;
}
}
public struct DLexer
{
import core.vararg;
private enum pseudoTokenHandlers = [
"\"", "lexStringLiteral",
"`", "lexWysiwygString",
"//", "lexSlashSlashComment",
"/*", "lexSlashStarComment",
"/+", "lexSlashPlusComment",
".", "lexDot",
"'", "lexCharacterLiteral",
"0", "lexNumber",
"1", "lexDecimal",
"2", "lexDecimal",
"3", "lexDecimal",
"4", "lexDecimal",
"5", "lexDecimal",
"6", "lexDecimal",
"7", "lexDecimal",
"8", "lexDecimal",
"9", "lexDecimal",
"q\"", "lexDelimitedString",
"q{", "lexTokenString",
"r\"", "lexWysiwygString",
"x\"", "lexHexString",
" ", "lexWhitespace",
"\t", "lexWhitespace",
"\r", "lexWhitespace",
"\n", "lexWhitespace",
"\u2028", "lexLongNewline",
"\u2029", "lexLongNewline",
"#!", "lexScriptLine",
"#line", "lexSpecialTokenSequence"
];
mixin Lexer!(IdType, Token, lexIdentifier, staticTokens,
dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens);
this(ubyte[] range, const LexerConfig config, StringCache* cache)
{
this.range = LexerRange(range);
this.config = config;
this.cache = cache;
popFront();
}
private static bool isDocComment(string comment) pure nothrow @safe
{
return comment.length >= 3 && (comment[0 .. 3] == "///"
|| comment[0 .. 3] == "/**" || comment[0 .. 3] == "/++");
}
public void popFront() pure
{
_popFront();
string comment = null;
switch (front.type)
{
case tok!"comment":
if (config.commentBehavior == CommentBehavior.attach)
{
import std.string;
if (isDocComment(front.text))
comment = comment == null ? front.text : format("%s\n%s", comment, front.text);
do _popFront(); while (front == tok!"comment");
if (front == tok!"whitespace") goto case tok!"whitespace";
}
break;
case tok!"whitespace":
if (config.whitespaceBehavior == WhitespaceBehavior.skip)
{
do _popFront(); while (front == tok!"whitespace");
if (front == tok!"comment") goto case tok!"comment";
}
break;
default:
break;
}
_front.comment = comment;
}
bool isWhitespace() pure /*const*/ nothrow
{
switch (range.front)
{
case ' ':
case '\r':
case '\n':
case '\t':
return true;
case 0xe2:
auto peek = range.peek(2);
return peek.length == 2
&& peek[0] == 0x80
&& (peek[1] == 0xa8 || peek[1] == 0xa9);
default:
return false;
}
}
void popFrontWhitespaceAware() pure nothrow
{
switch (range.front)
{
case '\r':
range.popFront();
if (!range.empty && range.front == '\n')
{
range.popFront();
range.incrementLine();
}
else
range.incrementLine();
return;
case '\n':
range.popFront();
range.incrementLine();
return;
case 0xe2:
auto lookahead = range.peek(3);
if (lookahead.length == 3 && lookahead[1] == 0x80
&& (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
{
range.popFront();
range.popFront();
range.popFront();
range.incrementLine();
return;
}
else
{
range.popFront();
return;
}
default:
range.popFront();
return;
}
}
Token lexWhitespace() pure nothrow
{
mixin (tokenStart);
loop: do
{
switch (range.front)
{
case '\r':
range.popFront();
if (!range.empty && range.front == '\n')
range.popFront();
range.incrementLine();
break;
case '\n':
range.popFront();
range.incrementLine();
break;
case ' ':
case '\t':
range.popFront();
break;
case 0xe2:
auto lookahead = range.peek(3);
if (lookahead.length != 3)
break loop;
if (lookahead[1] != 0x80)
break loop;
if (lookahead[2] == 0xa8 || lookahead[2] == 0xa9)
{
range.popFront();
range.popFront();
range.popFront();
range.incrementLine();
break;
}
break loop;
default:
break loop;
}
} while (!range.empty);
string text = config.whitespaceBehavior == WhitespaceBehavior.skip
? null : cache.cacheGet(range.slice(mark));
return Token(tok!"whitespace", text, line, column, index);
}
Token lexNumber() pure nothrow
{
mixin (tokenStart);
if (range.canPeek(1) && range.front == '0')
{
auto ahead = range.peek(1)[1];
switch (ahead)
{
case 'x':
case 'X':
range.popFront();
range.popFront();
return lexHex(mark, line, column, index);
case 'b':
case 'B':
range.popFront();
range.popFront();
return lexBinary(mark, line, column, index);
default:
return lexDecimal(mark, line, column, index);
}
}
else
return lexDecimal(mark, line, column, index);
}
Token lexHex() pure nothrow
{
mixin (tokenStart);
return lexHex(mark, line, column, index);
}
Token lexHex(size_t mark, size_t line, size_t column, size_t index) pure nothrow
{
IdType type = tok!"intLiteral";
bool foundDot;
hexLoop: while (!range.empty)
{
switch (range.front)
{
case 'a': .. case 'f':
case 'A': .. case 'F':
case '0': .. case '9':
case '_':
range.popFront();
break;
case 'u':
case 'U':
lexIntSuffix(type);
break hexLoop;
case 'i':
if (foundDot)
lexFloatSuffix(type);
break hexLoop;
case 'L':
if (foundDot)
lexFloatSuffix(type);
else
lexIntSuffix(type);
break hexLoop;
case 'p':
case 'P':
lexExponent(type);
break hexLoop;
case '.':
if (foundDot)
break hexLoop;
if (range.peek(1).length && range.peek(1)[0] == '.')
break hexLoop;
range.popFront();
foundDot = true;
type = tok!"doubleLiteral";
break;
default:
break hexLoop;
}
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexBinary() pure nothrow
{
mixin (tokenStart);
return lexBinary(mark, line, column, index);
}
Token lexBinary(size_t mark, size_t line, size_t column, size_t index) pure nothrow
{
IdType type = tok!"intLiteral";
binaryLoop: while (!range.empty)
{
switch (range.front)
{
case '0':
case '1':
case '_':
range.popFront();
break;
case 'u':
case 'U':
case 'L':
lexIntSuffix(type);
break binaryLoop;
default:
break binaryLoop;
}
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexDecimal() pure nothrow
{
mixin (tokenStart);
return lexDecimal(mark, line, column, index);
}
Token lexDecimal(size_t mark, size_t line, size_t column, size_t index) pure nothrow
{
bool foundDot = range.front == '.';
IdType type = tok!"intLiteral";
if (foundDot)
{
range.popFront();
type = tok!"doubleLiteral";
}
decimalLoop: while (!range.empty)
{
switch (range.front)
{
case '0': .. case '9':
case '_':
range.popFront();
break;
case 'u':
case 'U':
if (!foundDot)
lexIntSuffix(type);
break decimalLoop;
case 'i':
lexFloatSuffix(type);
break decimalLoop;
case 'L':
if (foundDot)
lexFloatSuffix(type);
else
lexIntSuffix(type);
break decimalLoop;
case 'f':
case 'F':
lexFloatSuffix(type);
break decimalLoop;
case 'e':
case 'E':
lexExponent(type);
break decimalLoop;
case '.':
if (foundDot)
break decimalLoop;
auto lookahead = range.peek(2);
if (lookahead.length == 2 && lookahead[1] == '.')
break decimalLoop;
else
{
// The following bit of silliness tries to tell the
// difference between "int dot identifier" and
// "double identifier".
if (lookahead.length == 2)
{
switch (lookahead[1])
{
case '0': .. case '9':
goto doubleLiteral;
default:
break decimalLoop;
}
}
else
{
doubleLiteral:
range.popFront();
foundDot = true;
type = tok!"doubleLiteral";
}
}
break;
default:
break decimalLoop;
}
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
void lexIntSuffix(ref IdType type) pure nothrow @safe
{
bool secondPass;
if (range.front == 'u' || range.front == 'U')
{
U:
if (type == tok!"intLiteral")
type = tok!"uintLiteral";
else
type = tok!"ulongLiteral";
range.popFront();
if (secondPass)
return;
if (range.front == 'L' || range.front == 'l')
goto L;
return;
}
if (range.front == 'L' || range.front == 'l')
{
L:
if (type == tok!"uintLiteral")
type = tok!"ulongLiteral";
else
type = tok!"longLiteral";
range.popFront();
if (range.front == 'U' || range.front == 'u')
{
secondPass = true;
goto U;
}
return;
}
}
void lexFloatSuffix(ref IdType type) pure nothrow @safe
{
switch (range.front)
{
case 'L':
range.popFront();
type = tok!"doubleLiteral";
break;
case 'f':
case 'F':
range.popFront();
type = tok!"floatLiteral";
break;
default:
break;
}
if (!range.empty && range.front == 'i')
{
warning("Complex number literals are deprecated");
range.popFront();
if (type == tok!"floatLiteral")
type = tok!"ifloatLiteral";
else
type = tok!"idoubleLiteral";
}
}
void lexExponent(ref IdType type) pure nothrow @safe
{
range.popFront();
bool foundSign = false;
bool foundDigit = false;
while (!range.empty)
{
switch (range.front)
{
case '-':
case '+':
if (foundSign)
{
if (!foundDigit)
error("Expected an exponent");
return;
}
foundSign = true;
range.popFront();
break;
case '0': .. case '9':
case '_':
foundDigit = true;
range.popFront();
break;
case 'L':
case 'f':
case 'F':
case 'i':
lexFloatSuffix(type);
return;
default:
if (!foundDigit)
error("Expected an exponent");
return;
}
}
}
Token lexScriptLine() pure
{
mixin (tokenStart);
while (!range.empty && !isNewline)
range.popFront();
return Token(tok!"scriptLine", cache.cacheGet(range.slice(mark)),
line, column, index);
}
Token lexSpecialTokenSequence() pure
{
mixin (tokenStart);
while (!range.empty && !isNewline)
range.popFront();
return Token(tok!"specialTokenSequence", cache.cacheGet(range.slice(mark)),
line, column, index);
}
Token lexSlashStarComment() pure
{
mixin (tokenStart);
IdType type = tok!"comment";
range.popFront();
range.popFront();
while (!range.empty)
{
if (range.front == '*')
{
range.popFront();
if (!range.empty && range.front == '/')
{
range.popFront();
break;
}
}
else
popFrontWhitespaceAware();
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexSlashSlashComment() pure nothrow
{
mixin (tokenStart);
IdType type = tok!"comment";
range.popFront();
range.popFront();
while (!range.empty)
{
if (range.front == '\r' || range.front == '\n')
break;
range.popFront();
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexSlashPlusComment() pure nothrow
{
mixin (tokenStart);
IdType type = tok!"comment";
range.popFront();
range.popFront();
int depth = 1;
while (depth > 0 && !range.empty)
{
if (range.front == '+')
{
range.popFront();
if (!range.empty && range.front == '/')
{
range.popFront();
depth--;
}
}
else if (range.front == '/')
{
range.popFront();
if (!range.empty && range.front == '+')
{
range.popFront();
depth++;
}
}
else
popFrontWhitespaceAware();
}
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexStringLiteral() pure nothrow
{
mixin (tokenStart);
range.popFront();
while (true)
{
if (range.empty)
{
error("Error: unterminated string literal");
return Token();
}
else if (range.front == '"')
{
range.popFront();
break;
}
else if (range.front == '\\')
{
lexEscapeSequence();
}
else
range.popFront();
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
Token lexWysiwygString() pure nothrow
{
mixin (tokenStart);
IdType type = tok!"stringLiteral";
bool backtick = range.front == '`';
if (backtick)
{
range.popFront();
while (true)
{
if (range.empty)
{
error("Error: unterminated string literal");
return Token(tok!"");
}
else if (range.front == '`')
{
range.popFront();
break;
}
else
popFrontWhitespaceAware();
}
}
else
{
range.popFront();
if (range.empty)
{
error("Error: unterminated string literal");
return Token(tok!"");
}
range.popFront();
while (true)
{
if (range.empty)
{
error("Error: unterminated string literal");
return Token(tok!"");
}
else if (range.front == '"')
{
range.popFront();
break;
}
else
popFrontWhitespaceAware();
}
}
lexStringSuffix(type);
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
void lexStringSuffix(ref IdType type) pure nothrow
{
if (range.empty)
type = tok!"stringLiteral";
else
{
switch (range.front)
{
case 'w': range.popFront(); type = tok!"wstringLiteral"; break;
case 'd': range.popFront(); type = tok!"dstringLiteral"; break;
case 'c': range.popFront(); type = tok!"stringLiteral"; break;
default: type = tok!"stringLiteral"; break;
}
}
}
Token lexDelimitedString() pure nothrow
{
import std.traits;
mixin (tokenStart);
range.popFront();
range.popFront();
ubyte open;
ubyte close;
switch (range.front)
{
case '<':
open = '<';
close = '>';
range.popFront();
return lexNormalDelimitedString(mark, line, column, index, open, close);
case '{':
open = '{';
close = '}';
range.popFront();
return lexNormalDelimitedString(mark, line, column, index, open, close);
case '[':
open = '[';
close = ']';
range.popFront();
return lexNormalDelimitedString(mark, line, column, index, open, close);
case '(':
open = '(';
close = ')';
range.popFront();
return lexNormalDelimitedString(mark, line, column, index, open, close);
default:
return lexHeredocString(mark, line, column, index);
}
}
Token lexNormalDelimitedString(size_t mark, size_t line, size_t column,
size_t index, ubyte open, ubyte close)
pure nothrow
{
int depth = 1;
while (!range.empty && depth > 0)
{
if (range.front == open)
{
depth++;
range.popFront();
}
else if (range.front == close)
{
depth--;
range.popFront();
if (depth <= 0)
{
if (range.front == '"')
range.popFront();
else
{
error("Error: \" expected to end delimited string literal");
return Token(tok!"");
}
}
}
else
popFrontWhitespaceAware();
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
return Token(type, cache.cacheGet(range.slice(mark)), line, column, index);
}
Token lexHeredocString(size_t mark, size_t line, size_t column, size_t index)
pure nothrow
{
import std.regex;
Token ident = lexIdentifier();
if (isNewline())
popFrontWhitespaceAware();
else
error("Newline expected");
while (!range.empty)
{
if (isNewline())
{
popFrontWhitespaceAware();
if (range.peek(ident.text.length) == ident.text)
{
foreach (i ; 0 .. ident.text.length)
range.popFront();
break;
}
}
else
range.popFront();
}
if (range.front == '"')
range.popFront();
else
error(`" expected`);
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
return Token(type, cache.cacheGet(range.slice(mark)), line, column, index);
}
Token lexTokenString() pure
{
mixin (tokenStart);
assert(range.front == 'q');
range.popFront();
assert(range.front == '{');
range.popFront();
auto app = appender!string();
app.put("q{");
int depth = 1;
LexerConfig c = config;
scope(exit) config = c;
config.whitespaceBehavior = WhitespaceBehavior.include;
config.stringBehavior = StringBehavior.source;
config.commentBehavior = CommentBehavior.include;
_front = advance();
while (depth > 0 && !empty)
{
auto t = front();
if (t.text is null)
app.put(str(t.type));
else
app.put(t.text);
if (t.type == tok!"}")
{
depth--;
if (depth > 0)
popFront();
}
else if (t.type == tok!"{")
{
depth++;
popFront();
}
else
popFront();
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
return Token(type, cache.cacheGet(cast(const(ubyte)[]) app.data), line,
column, index);
}
Token lexHexString() pure nothrow
{
mixin (tokenStart);
range.popFront();
range.popFront();
loop: while (true)
{
if (range.empty)
{
error("Error: unterminated hex string literal");
return Token();
}
else if (isWhitespace())
popFrontWhitespaceAware();
else switch (range.front)
{
case '0': .. case '9':
case 'A': .. case 'F':
case 'a': .. case 'f':
range.popFront();
break;
case '"':
range.popFront();
break loop;
default:
error("Error: invalid character in hex string");
return Token();
}
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
return Token(type, cache.cacheGet(range.slice(mark)), line, column,
index);
}
bool lexEscapeSequence() pure nothrow
{
range.popFront();
if (range.empty)
{
error("Error: non-terminated character escape sequence.");
return false;
}
switch (range.front)
{
case '\'':
case '"':
case '?':
case '\\':
case '0':
case 'a':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
range.popFront();
break;
case 'x':
// TODO
range.popFront();
break;
case '1': .. case '7':
for (size_t i = 0; i < 3 && !range.empty && range.front >= '0' && range.front <= '7'; i++)
range.popFront();
break;
case 'u':
range.popFront();
foreach (i; 0 .. 4)
{
if (range.empty)
{
error("Error: at least 4 hex digits expected.");
return false;
}
switch (range.front)
{
case '0': .. case '9':
case 'a': .. case 'f':
case 'A': .. case 'F':
range.popFront();
break;
default:
error("Error: at least 4 hex digits expected.");
return false;
}
}
break;
case 'U':
range.popFront();
foreach (i; 0 .. 8)
{
if (range.empty)
{
error("Error: at least 8 hex digits expected.");
return false;
}
switch (range.front)
{
case '0': .. case '9':
case 'a': .. case 'f':
case 'A': .. case 'F':
range.popFront();
break;
default:
error("Error: at least 8 hex digits expected.");
return false;
}
}
break;
default:
while (true)
{
if (range.empty)
{
error("Error: non-terminated character escape sequence.");
return false;
}
if (range.front == ';')
break;
else
range.popFront();
}
}
return true;
}
Token lexCharacterLiteral() pure nothrow
{
mixin (tokenStart);
range.popFront();
if (range.front == '\\')
{
lexEscapeSequence();
goto close;
}
else if (range.front == '\'')
{
range.popFront();
return Token(tok!"characterLiteral", cache.cacheGet(range.slice(mark)),
line, column, index);
}
else if (range.front & 0x80)
{
while (range.front & 0x80)
range.popFront();
goto close;
}
else
{
popFrontWhitespaceAware();
goto close;
}
close:
if (range.front == '\'')
{
range.popFront();
return Token(tok!"characterLiteral", cache.cacheGet(range.slice(mark)),
line, column, index);
}
else
{
error("Error: Expected ' to end character literal ", cast(char) range.front);
return Token();
}
}
Token lexIdentifier() pure nothrow
{
mixin (tokenStart);
uint hash = 0;
while (!range.empty && !isSeparating(0))
{
hash = StringCache.hashStep(range.front, hash);
range.popFront();
}
return Token(tok!"identifier", cache.cacheGet(range.slice(mark), hash), line,
column, index);
}
Token lexDot() pure nothrow
{
mixin (tokenStart);
auto lookahead = range.peek(1);
if (lookahead.length == 0)
{
range.popFront();
return Token(tok!".", null, line, column, index);
}
switch (lookahead[0])
{
case '0': .. case '9':
return lexNumber();
case '.':
range.popFront();
range.popFront();
if (!range.empty && range.front == '.')
{
range.popFront();
return Token(tok!"...", null, line, column, index);
}
else
return Token(tok!"..", null, line, column, index);
default:
range.popFront();
return Token(tok!".", null, line, column, index);
}
}
Token lexLongNewline() pure nothrow
{
mixin (tokenStart);
range.popFront();
range.popFront();
range.popFront();
range.incrementLine();
return Token(tok!"whitespace", cache.cacheGet(range.slice(mark)), line,
column, index);
}
bool isNewline() pure @safe nothrow
{
if (range.front == '\n') return true;
if (range.front == '\r') return true;
auto lookahead = range.peek(3);
if (lookahead.length == 0) return false;
if (lookahead == "\u2028" || lookahead == "\u2029")
return true;
return false;
}
bool isSeparating(size_t offset) const pure nothrow @safe
{
auto r = range.save();
r.popFrontN(offset);
auto c = r.front;
if (c <= 0x2f) return true;
if (c >= ':' && c <= '@') return true;
if (c >= '[' && c <= '^') return true;
if (c >= '{' && c <= '~') return true;
if (c == '`') return true;
if (c & 0x80 && (r.peek(3) == "\u2028"
|| range.peek(3) == "\u2029")) return true;
return false;
}
enum tokenStart = q{
size_t index = range.index;
size_t column = range.column;
size_t line = range.line;
auto mark = range.mark();
};
void error(...) pure nothrow @safe {
}
void warning(...) pure nothrow @safe {
}
StringCache* cache;
LexerConfig config;
}
public auto byToken(ubyte[] range)
{
LexerConfig config;
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
return DLexer(range, config, cache);
}
public auto byToken(ubyte[] range, StringCache* cache)
{
LexerConfig config;
return DLexer(range, config, cache);
}
public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
{
return DLexer(range, config, cache);
}
unittest
{
import std.stdio;
auto source = cast(ubyte[]) q{ import std.stdio;}c;
auto tokens = byToken(source);
assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
tok!"identifier", tok!";"]));
}