2481 lines
70 KiB
D
2481 lines
70 KiB
D
// Written in the D programming language
|
|
|
|
/**
|
|
* This module contains a range-based _lexer for the D programming language.
|
|
*
|
|
* For performance reasons the _lexer contained in this module operates only on
|
|
* ASCII and UTF-8 encoded source code. If the use of other encodings is
|
|
* desired, the source code must be converted to UTF-8 before passing it to this
|
|
* _lexer.
|
|
*
|
|
* To use the _lexer, create a LexerConfig struct
|
|
* ---
|
|
* LexerConfig config;
|
|
* config.iterStyle = IterationStyle.everything;
|
|
* config.tokenStyle = IterationStyle.source;
|
|
* config.versionNumber = 2061;
|
|
* config.vendorString = "Lexer Example";
|
|
* ---
|
|
* Once you have configured the _lexer, call byToken$(LPAREN)$(RPAREN) on your
|
|
* source code, passing in the configuration.
|
|
* ---
|
|
* auto source = "import std.stdio;"c;
|
|
* auto tokens = byToken(source, config);
|
|
* ---
|
|
* The result of byToken$(LPAREN)$(RPAREN) is a forward range of tokens that can
|
|
* be used easily with the algorithms from std.algorithm or iterated over with
|
|
* $(D_KEYWORD foreach)
|
|
* ---
|
|
* assert (tokens.front.type == TokenType.import_);
|
|
* assert (tokens.front.value == "import");
|
|
* assert (tokens.front.line == 1);
|
|
* assert (tokens.front.startIndex == 0);
|
|
* ---
|
|
*
|
|
* Examples:
|
|
*
|
|
* Generate HTML markup of D code.
|
|
* ---
|
|
* module highlighter;
|
|
*
|
|
* import std.stdio;
|
|
* import std.array;
|
|
* import std.d.lexer;
|
|
*
|
|
* void writeSpan(string cssClass, string value)
|
|
* {
|
|
* stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&").replace("<", "<"), `</span>`);
|
|
* }
|
|
*
|
|
*
|
|
* // http://ethanschoonover.com/solarized
|
|
* void highlight(R)(R tokens)
|
|
* {
|
|
* stdout.writeln(q"[<!DOCTYPE html>
|
|
* <html>
|
|
* <head>
|
|
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
|
|
* <body>
|
|
* <style type="text/css">
|
|
* html { background-color: #fdf6e3; color: #002b36; }
|
|
* .kwrd { color: #b58900; font-weight: bold; }
|
|
* .com { color: #93a1a1; font-style: italic; }
|
|
* .num { color: #dc322f; font-weigth: bold; }
|
|
* .str { color: #2aa198; font-style: italic; }
|
|
* .op { color: #586e75; font-weight: bold; }
|
|
* .type { color: #268bd2; font-weight: bold; }
|
|
* .cons { color: #859900; font-weight: bold; }
|
|
* </style>
|
|
* <pre>]");
|
|
*
|
|
* foreach (Token t; tokens)
|
|
* {
|
|
* if (isType(t.type))
|
|
* writeSpan("type", t.value);
|
|
* else if (isKeyword(t.type))
|
|
* writeSpan("kwrd", t.value);
|
|
* else if (t.type == TokenType.comment)
|
|
* writeSpan("com", t.value);
|
|
* else if (isStringLiteral(t.type))
|
|
* writeSpan("str", t.value);
|
|
* else if (isNumberLiteral(t.type))
|
|
* writeSpan("num", t.value);
|
|
* else if (isOperator(t.type))
|
|
* writeSpan("op", t.value);
|
|
* else
|
|
* stdout.write(t.value.replace("<", "<"));
|
|
* }
|
|
* stdout.writeln("</pre>\n</body></html>");
|
|
* }
|
|
*
|
|
* void main(string[] args)
|
|
* {
|
|
* LexerConfig config;
|
|
* config.tokenStyle = TokenStyle.source;
|
|
* config.iterStyle = IterationStyle.everything;
|
|
* config.fileName = args[1];
|
|
* auto f = File(args[1]);
|
|
* (cast(ubyte[]) f.byLine(KeepTerminator.yes).join()).byToken(config).highlight();
|
|
* }
|
|
* ---
|
|
*
|
|
* Copyright: Brian Schott 2013
|
|
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
|
* Authors: Brian Schott
|
|
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
|
*/
|
|
|
|
module std.d.lexer;
|
|
|
|
import std.algorithm;
|
|
import std.ascii;
|
|
import std.conv;
|
|
import std.d.entities;
|
|
import std.datetime;
|
|
import std.exception;
|
|
import std.range;
|
|
import std.string;
|
|
import std.traits;
|
|
import std.uni;
|
|
import std.utf;
|
|
import std.regex;
|
|
|
|
import std.stdio;
|
|
|
|
public:
|
|
|
|
/**
|
|
* Represents a D token
|
|
*/
|
|
struct Token
|
|
{
|
|
/**
|
|
* The token type.
|
|
*/
|
|
TokenType type;
|
|
|
|
/**
|
|
* The representation of the token in the original source code.
|
|
*/
|
|
string value;
|
|
|
|
/**
|
|
* The number of the line the token is on.
|
|
*/
|
|
uint line;
|
|
|
|
/**
|
|
* The column number of the start of the token in the original source.
|
|
* $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
|
|
*/
|
|
uint column;
|
|
|
|
/**
|
|
* The index of the start of the token in the original source.
|
|
* $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
|
|
*/
|
|
uint startIndex;
|
|
|
|
/**
|
|
* Check to see if the token is of the same type and has the same string
|
|
* representation as the given token.
|
|
*/
|
|
bool opEquals(ref const(Token) other) const
|
|
{
|
|
return other.type == type && other.value == value;
|
|
}
|
|
|
|
/**
|
|
* Checks to see if the token's string representation is equal to the given
|
|
* string.
|
|
*/
|
|
bool opEquals(string value) const { return this.value == value; }
|
|
|
|
/**
|
|
* Checks to see if the token is of the given type.
|
|
*/
|
|
bool opEquals(TokenType type) const { return type == type; }
|
|
|
|
/**
|
|
* Comparison operator orders tokens by start index.
|
|
*/
|
|
int opCmp(size_t i) const
|
|
{
|
|
if (startIndex < i) return -1;
|
|
if (startIndex > i) return 1;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Configure the behavior of the byToken() function. These flags may be
|
|
* combined using a bitwise or.
|
|
*/
|
|
enum IterationStyle
|
|
{
|
|
/// Only include code, not whitespace or comments
|
|
codeOnly = 0,
|
|
/// Includes comments
|
|
includeComments = 0b0001,
|
|
/// Includes whitespace
|
|
includeWhitespace = 0b0010,
|
|
/// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
|
|
includeSpecialTokens = 0b0100,
|
|
/// Do not stop iteration on reaching the ___EOF__ token
|
|
ignoreEOF = 0b1000,
|
|
/// Include everything
|
|
everything = includeComments | includeWhitespace | ignoreEOF
|
|
}
|
|
|
|
/**
|
|
* Configuration of the token lexing style. These flags may be combined with a
|
|
* bitwise or.
|
|
*/
|
|
enum TokenStyle : uint
|
|
{
|
|
/**
|
|
* Escape sequences will be replaced with their equivalent characters,
|
|
* enclosing quote characters will not be included. Special tokens such as
|
|
* __VENDOR__ will be replaced with their equivalent strings. Useful for
|
|
* creating a compiler or interpreter.
|
|
*/
|
|
default_ = 0b0000,
|
|
|
|
/**
|
|
* Escape sequences will not be processed. An escaped quote character will
|
|
* not terminate string lexing, but it will not be replaced with the quote
|
|
* character in the token.
|
|
*/
|
|
notEscaped = 0b0001,
|
|
|
|
/**
|
|
* Strings will include their opening and closing quote characters as well
|
|
* as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
|
|
* include the $(D_STRING 'w') character as well as the opening and closing
|
|
* quotes$(RPAREN)
|
|
*/
|
|
includeQuotes = 0b0010,
|
|
|
|
/**
|
|
* Do not replace the value field of the special tokens such as ___DATE__
|
|
* with their string equivalents.
|
|
*/
|
|
doNotReplaceSpecial = 0b0100,
|
|
|
|
/**
|
|
* Strings will be read exactly as they appeared in the source, including
|
|
* their opening and closing quote characters. Useful for syntax
|
|
* highlighting.
|
|
*/
|
|
source = notEscaped | includeQuotes | doNotReplaceSpecial
|
|
}
|
|
|
|
/**
|
|
* Lexer configuration
|
|
*/
|
|
struct LexerConfig
|
|
{
|
|
/**
|
|
* Iteration style
|
|
*/
|
|
IterationStyle iterStyle = IterationStyle.codeOnly;
|
|
|
|
/**
|
|
* Token style
|
|
*/
|
|
TokenStyle tokenStyle = tokenStyle.default_;
|
|
|
|
/**
|
|
* Replacement for the ___VERSION__ token. Defaults to 1.
|
|
*/
|
|
uint versionNumber = 1;
|
|
|
|
/**
|
|
* Replacement for the ___VENDOR__ token. Defaults to $(D_STRING "std.d.lexer")
|
|
*/
|
|
string vendorString = "std.d.lexer";
|
|
|
|
/**
|
|
* Name used when creating error messages that are sent to errorFunc. This
|
|
* is needed because the lexer operates on any forwarad range of ASCII
|
|
* characters or UTF-8 code units and does not know what to call its input
|
|
* source. Defaults to the empty string.
|
|
*/
|
|
string fileName = "";
|
|
|
|
/**
|
|
* This function is called when an error is encountered during lexing.
|
|
* Parameters are file name, code uint index, line number, column,
|
|
* and error messsage.
|
|
*/
|
|
void delegate(string, uint, uint, uint, string) errorFunc;
|
|
|
|
/**
|
|
* Initial size of the lexer's internal token buffer in bytes. The lexer
|
|
* will grow this buffer if necessary.
|
|
*/
|
|
size_t bufferSize = 1024 * 4;
|
|
}
|
|
|
|
/**
|
|
* Iterate over the given range of characters by D tokens.
|
|
* Params:
|
|
* range = the range of characters
|
|
* config = the lexer configuration
|
|
* Returns:
|
|
* an input range of tokens
|
|
*/
|
|
TokenRange!(R) byToken(R)(R range, LexerConfig config) if (isForwardRange!(R))
|
|
{
|
|
auto r = TokenRange!(R)(range);
|
|
r.config = config;
|
|
r.lineNumber = 1;
|
|
r.popFront();
|
|
return r;
|
|
}
|
|
|
|
/**
|
|
* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
|
|
*/
|
|
struct TokenRange(R) if (isForwardRange!(R))
|
|
{
|
|
/**
|
|
* Returns: true if the range is empty
|
|
*/
|
|
bool empty() const @property
|
|
{
|
|
return _empty;
|
|
}
|
|
|
|
/**
|
|
* Returns: the current token
|
|
*/
|
|
Token front() const @property
|
|
{
|
|
enforce(!_empty, "Cannot call front() on empty token range");
|
|
return current;
|
|
}
|
|
|
|
/**
|
|
* Returns the current token and then removes it from the range
|
|
*/
|
|
Token moveFront()
|
|
{
|
|
auto r = front();
|
|
popFront();
|
|
return r;
|
|
}
|
|
|
|
/**
|
|
* Range operation
|
|
*/
|
|
int opApply(int delegate(Token) dg)
|
|
{
|
|
int result = 0;
|
|
while (!empty)
|
|
{
|
|
result = dg(front);
|
|
if (result)
|
|
break;
|
|
popFront();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Range operation
|
|
*/
|
|
int opApply(int delegate(size_t, Token) dg)
|
|
{
|
|
int result = 0;
|
|
int i = 0;
|
|
while (!empty)
|
|
{
|
|
result = dg(i, front);
|
|
if (result)
|
|
break;
|
|
popFront();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Removes the current token from the range
|
|
*/
|
|
void popFront()
|
|
{
|
|
// Filter out tokens we don't care about
|
|
loop: do
|
|
{
|
|
advance();
|
|
switch (current.type)
|
|
{
|
|
case TokenType.whitespace:
|
|
if (config.iterStyle & IterationStyle.includeWhitespace)
|
|
break loop;
|
|
break;
|
|
case TokenType.comment:
|
|
if (config.iterStyle & IterationStyle.includeComments)
|
|
break loop;
|
|
break;
|
|
case TokenType.specialTokenSequence:
|
|
if (config.iterStyle & IterationStyle.includeSpecialTokens)
|
|
break loop;
|
|
break;
|
|
default:
|
|
break loop;
|
|
}
|
|
}
|
|
while (!empty());
|
|
}
|
|
|
|
private:
|
|
|
|
this(ref R range)
|
|
{
|
|
this.range = range;
|
|
buffer = new ubyte[config.bufferSize];
|
|
}
|
|
|
|
/*
|
|
* Advances the range to the next token
|
|
*/
|
|
void advance()
|
|
{
|
|
if (range.empty)
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
bufferIndex = 0;
|
|
current.line = lineNumber;
|
|
current.startIndex = index;
|
|
current.column = column;
|
|
current.value = null;
|
|
|
|
if (std.ascii.isWhite(range.front))
|
|
{
|
|
lexWhitespace();
|
|
return;
|
|
}
|
|
outer: switch (range.front)
|
|
{
|
|
mixin(generateCaseTrie(
|
|
"=", "TokenType.assign",
|
|
"@", "TokenType.at",
|
|
"&", "TokenType.bitAnd",
|
|
"&=", "TokenType.bitAndEquals",
|
|
"|", "TokenType.bitOr",
|
|
"|=", "TokenType.bitOrEquals",
|
|
"~=", "TokenType.catEquals",
|
|
":", "TokenType.colon",
|
|
",", "TokenType.comma",
|
|
"--", "TokenType.decrement",
|
|
"$", "TokenType.dollar",
|
|
"==", "TokenType.equals",
|
|
"=>", "TokenType.goesTo",
|
|
">", "TokenType.greater",
|
|
">=", "TokenType.greaterEqual",
|
|
"++", "TokenType.increment",
|
|
"{", "TokenType.lBrace",
|
|
"[", "TokenType.lBracket",
|
|
"<", "TokenType.less",
|
|
"<=", "TokenType.lessEqual",
|
|
"<>=", "TokenType.lessEqualGreater",
|
|
"<>", "TokenType.lessOrGreater",
|
|
"&&", "TokenType.logicAnd",
|
|
"||", "TokenType.logicOr",
|
|
"(", "TokenType.lParen",
|
|
"-", "TokenType.minus",
|
|
"-=", "TokenType.minusEquals",
|
|
"%", "TokenType.mod",
|
|
"%=", "TokenType.modEquals",
|
|
"*=", "TokenType.mulEquals",
|
|
"!", "TokenType.not",
|
|
"!=", "TokenType.notEquals",
|
|
"!>", "TokenType.notGreater",
|
|
"!>=", "TokenType.notGreaterEqual",
|
|
"!<", "TokenType.notLess",
|
|
"!<=", "TokenType.notLessEqual",
|
|
"!<>", "TokenType.notLessEqualGreater",
|
|
"+", "TokenType.plus",
|
|
"+=", "TokenType.plusEquals",
|
|
"^^", "TokenType.pow",
|
|
"^^=", "TokenType.powEquals",
|
|
"}", "TokenType.rBrace",
|
|
"]", "TokenType.rBracket",
|
|
")", "TokenType.rParen",
|
|
";", "TokenType.semicolon",
|
|
"<<", "TokenType.shiftLeft",
|
|
"<<=", "TokenType.shiftLeftEqual",
|
|
">>", "TokenType.shiftRight",
|
|
">>=", "TokenType.shiftRightEqual",
|
|
"*", "TokenType.star",
|
|
"?", "TokenType.ternary",
|
|
"~", "TokenType.tilde",
|
|
"!<>=", "TokenType.unordered",
|
|
">>>", "TokenType.unsignedShiftRight",
|
|
">>>=", "TokenType.unsignedShiftRightEqual",
|
|
"^", "TokenType.xor",
|
|
"^=", "TokenType.xorEquals",
|
|
));
|
|
case '/':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (r.isEoF())
|
|
{
|
|
current.type = TokenType.div;
|
|
current.value = "/";
|
|
range.popFront();
|
|
++index;
|
|
break;
|
|
}
|
|
switch (r.front)
|
|
{
|
|
case '/':
|
|
case '*':
|
|
case '+':
|
|
lexComment();
|
|
break outer;
|
|
case '=':
|
|
current.type = TokenType.divEquals;
|
|
current.value = "/=";
|
|
range.popFront();
|
|
range.popFront();
|
|
index += 2;
|
|
break outer;
|
|
default:
|
|
current.type = TokenType.div;
|
|
current.value = "/";
|
|
++index;
|
|
range.popFront();
|
|
break outer;
|
|
}
|
|
case '.':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (r.isEoF())
|
|
{
|
|
current.type = TokenType.dot;
|
|
current.value = getTokenValue(TokenType.dot);
|
|
range.popFront();
|
|
++index;
|
|
break outer;
|
|
}
|
|
else if (r.front >= '0' && r.front <= '9')
|
|
{
|
|
lexNumber();
|
|
break outer;
|
|
}
|
|
else if (r.front == '.')
|
|
{
|
|
current.type = TokenType.slice;
|
|
r.popFront();
|
|
if (r.front == '.')
|
|
{
|
|
current.type = TokenType.vararg;
|
|
range.popFront();
|
|
range.popFront();
|
|
range.popFront();
|
|
index += 3;
|
|
}
|
|
else
|
|
{
|
|
|
|
range.popFront();
|
|
range.popFront();
|
|
index += 2;
|
|
}
|
|
current.value = getTokenValue(current.type);
|
|
}
|
|
else
|
|
{
|
|
range.popFront();
|
|
current.type = TokenType.dot;
|
|
current.value = getTokenValue(TokenType.dot);
|
|
}
|
|
break;
|
|
case '0': .. case '9':
|
|
lexNumber();
|
|
break;
|
|
case '\'':
|
|
case '"':
|
|
case '`':
|
|
lexString();
|
|
break;
|
|
case 'q':
|
|
auto r = range.save;
|
|
r.popFront();
|
|
if (!r.isEoF() && r.front == '{')
|
|
{
|
|
lexTokenString();
|
|
break;
|
|
}
|
|
else if (!r.isEoF() && r.front == '"')
|
|
{
|
|
lexDelimitedString();
|
|
break;
|
|
}
|
|
else
|
|
goto default;
|
|
case 'r':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (!r.isEoF() && r.front == '"')
|
|
{
|
|
lexString();
|
|
break;
|
|
}
|
|
else
|
|
goto default;
|
|
case 'x':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (!r.isEoF() && r.front == '"')
|
|
{
|
|
lexHexString();
|
|
break;
|
|
}
|
|
else
|
|
goto default;
|
|
case '#':
|
|
lexSpecialTokenSequence();
|
|
break;
|
|
default:
|
|
while(!range.isEoF() && !isSeparating(range.front))
|
|
{
|
|
keepChar();
|
|
}
|
|
|
|
current.type = lookupTokenType(cast(char[]) buffer[0 .. bufferIndex]);
|
|
current.value = getTokenValue(current.type);
|
|
if (current.value is null)
|
|
current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
|
|
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
|
|
{
|
|
_empty = true;
|
|
return;
|
|
}
|
|
|
|
if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
|
|
break;
|
|
|
|
switch (current.type)
|
|
{
|
|
case TokenType.date:
|
|
current.type = TokenType.stringLiteral;
|
|
auto time = Clock.currTime();
|
|
current.value = format("%s %02d %04d", time.month, time.day, time.year);
|
|
break;
|
|
case TokenType.time:
|
|
auto time = Clock.currTime();
|
|
current.type = TokenType.stringLiteral;
|
|
current.value = (cast(TimeOfDay)(time)).toISOExtString();
|
|
break;
|
|
case TokenType.timestamp:
|
|
auto time = Clock.currTime();
|
|
auto dt = cast(DateTime) time;
|
|
current.type = TokenType.stringLiteral;
|
|
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
|
|
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
|
|
dt.second, dt.year);
|
|
break;
|
|
case TokenType.vendor:
|
|
current.type = TokenType.stringLiteral;
|
|
current.value = config.vendorString;
|
|
break;
|
|
case TokenType.compilerVersion:
|
|
current.type = TokenType.stringLiteral;
|
|
current.value = format("%d", config.versionNumber);
|
|
break;
|
|
case TokenType.line:
|
|
current.type = TokenType.intLiteral;
|
|
current.value = format("%d", current.line);
|
|
break;
|
|
case TokenType.file:
|
|
current.type = TokenType.stringLiteral;
|
|
current.value = config.fileName;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
void lexWhitespace()
|
|
{
|
|
current.type = TokenType.whitespace;
|
|
while (!isEoF(range) && std.ascii.isWhite(range.front))
|
|
{
|
|
keepChar();
|
|
}
|
|
if (config.iterStyle & IterationStyle.includeWhitespace)
|
|
current.value = (cast(char[]) buffer[0..bufferIndex]).idup;
|
|
}
|
|
|
|
void lexComment()
|
|
in
|
|
{
|
|
assert (range.front == '/');
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.comment;
|
|
keepChar();
|
|
switch(range.front)
|
|
{
|
|
case '/':
|
|
while (!isEoF(range) && !isNewline(range))
|
|
{
|
|
keepChar();
|
|
}
|
|
break;
|
|
case '*':
|
|
while (!isEoF(range))
|
|
{
|
|
if (range.front == '*')
|
|
{
|
|
keepChar();
|
|
if (range.front == '/')
|
|
{
|
|
keepChar();
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
break;
|
|
case '+':
|
|
int depth = 1;
|
|
while (depth > 0 && !isEoF(range))
|
|
{
|
|
if (range.front == '+')
|
|
{
|
|
keepChar();
|
|
if (range.front == '/')
|
|
{
|
|
keepChar();
|
|
--depth;
|
|
}
|
|
}
|
|
else if (range.front == '/')
|
|
{
|
|
keepChar();
|
|
if (range.front == '+')
|
|
{
|
|
keepChar();
|
|
++depth;
|
|
}
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
if (config.iterStyle & IterationStyle.includeComments)
|
|
current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
}
|
|
|
|
void lexHexString()
|
|
in
|
|
{
|
|
assert (range.front == 'x');
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
size_t i;
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
{
|
|
buffer[i++] = 'x';
|
|
buffer[i++] = '"';
|
|
}
|
|
range.popFront();
|
|
range.popFront();
|
|
index += 2;
|
|
while (!range.isEoF())
|
|
{
|
|
if (i >= buffer.length)
|
|
{
|
|
errorMessage("Hex string constant exceeded buffer size");
|
|
return;
|
|
}
|
|
else if (isHexDigit(range.front))
|
|
{
|
|
keepChar();
|
|
}
|
|
else if (std.ascii.isWhite(range.front) && (config.tokenStyle & TokenStyle.notEscaped))
|
|
{
|
|
keepChar();
|
|
}
|
|
else if (range.front == '"')
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
buffer[i++] = '"';
|
|
range.popFront();
|
|
++index;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
errorMessage(format("Invalid character '%s' in hex string literal",
|
|
cast(char) range.front));
|
|
}
|
|
}
|
|
if (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case 'w':
|
|
current.type = TokenType.wstringLiteral;
|
|
goto case 'c';
|
|
case 'd':
|
|
current.type = TokenType.dstringLiteral;
|
|
goto case 'c';
|
|
case 'c':
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
buffer[i++] = range.front;
|
|
range.popFront();
|
|
++index;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (config.tokenStyle & TokenStyle.notEscaped)
|
|
current.value = (cast(char[]) buffer[0 .. i]).idup;
|
|
else
|
|
{
|
|
auto a = appender!(ubyte[])();
|
|
foreach (b; std.range.chunks(buffer[0 .. i], 2))
|
|
{
|
|
string s = to!string(cast(char[]) b);
|
|
a.put(cast(ubyte[]) to!string(cast(dchar) parse!uint(s, 16)));
|
|
}
|
|
current.value = to!string(cast(char[]) a.data);
|
|
}
|
|
}
|
|
|
|
void lexNumber()
|
|
in
|
|
{
|
|
assert(isDigit(cast(char) range.front) || range.front == '.');
|
|
}
|
|
body
|
|
{
|
|
// hex and binary can start with zero, anything else is decimal
|
|
if (range.front != '0')
|
|
lexDecimal();
|
|
else
|
|
{
|
|
auto r = range.save();
|
|
r.popFront();
|
|
switch (r.front)
|
|
{
|
|
case 'x':
|
|
case 'X':
|
|
keepChar();
|
|
keepChar();
|
|
lexHex();
|
|
break;
|
|
case 'b':
|
|
case 'B':
|
|
keepChar();
|
|
keepChar();
|
|
lexBinary();
|
|
break;
|
|
default:
|
|
lexDecimal();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexFloatSuffix()
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case 'L':
|
|
keepChar();
|
|
current.type = TokenType.doubleLiteral;
|
|
break;
|
|
case 'f':
|
|
case 'F':
|
|
keepChar();
|
|
current.type = TokenType.floatLiteral;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (!range.isEoF() && range.front == 'i')
|
|
{
|
|
keepChar();
|
|
if (current.type == TokenType.floatLiteral)
|
|
current.type = TokenType.ifloatLiteral;
|
|
else
|
|
current.type = TokenType.idoubleLiteral;
|
|
}
|
|
}
|
|
|
|
void lexIntSuffix()
|
|
{
|
|
bool foundU;
|
|
bool foundL;
|
|
while (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case 'u':
|
|
case 'U':
|
|
if (foundU)
|
|
return;
|
|
switch (current.type)
|
|
{
|
|
case TokenType.intLiteral:
|
|
current.type = TokenType.uintLiteral;
|
|
keepChar();
|
|
break;
|
|
case TokenType.longLiteral:
|
|
current.type = TokenType.ulongLiteral;
|
|
keepChar();
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
foundU = true;
|
|
break;
|
|
case 'L':
|
|
if (foundL)
|
|
return;
|
|
switch (current.type)
|
|
{
|
|
case TokenType.intLiteral:
|
|
current.type = TokenType.longLiteral;
|
|
keepChar();
|
|
break;
|
|
case TokenType.uintLiteral:
|
|
current.type = TokenType.ulongLiteral;
|
|
keepChar();
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
foundL = true;
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexExponent()
|
|
in
|
|
{
|
|
assert (range.front == 'e' || range.front == 'E' || range.front == 'p'
|
|
|| range.front == 'P');
|
|
}
|
|
body
|
|
{
|
|
keepChar();
|
|
bool foundSign = false;
|
|
while (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case '-':
|
|
case '+':
|
|
if (foundSign)
|
|
return;
|
|
foundSign = true;
|
|
keepChar();
|
|
case '0': .. case '9':
|
|
case '_':
|
|
keepChar();
|
|
break;
|
|
case 'L':
|
|
case 'f':
|
|
case 'F':
|
|
case 'i':
|
|
lexFloatSuffix();
|
|
return;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexDecimal()
|
|
in
|
|
{
|
|
assert ((range.front >= '0' && range.front <= '9') || range.front == '.');
|
|
}
|
|
body
|
|
{
|
|
bool foundDot = false;
|
|
current.type = TokenType.intLiteral;
|
|
scope(exit) current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
decimalLoop: while (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case '0': .. case '9':
|
|
case '_':
|
|
keepChar();
|
|
break;
|
|
case 'i':
|
|
case 'L':
|
|
if (foundDot)
|
|
{
|
|
lexFloatSuffix();
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
lexIntSuffix();
|
|
return;
|
|
}
|
|
case 'f':
|
|
case 'F':
|
|
lexFloatSuffix();
|
|
return;
|
|
case 'e':
|
|
case 'E':
|
|
lexExponent();
|
|
return;
|
|
case '.':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (!r.isEoF() && r.front == '.')
|
|
break decimalLoop; // possibly slice expression
|
|
if (foundDot)
|
|
break decimalLoop; // two dots with other characters between them
|
|
keepChar();
|
|
foundDot = true;
|
|
current.type = TokenType.doubleLiteral;
|
|
break;
|
|
default:
|
|
break decimalLoop;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void lexBinary()
|
|
{
|
|
current.type = TokenType.intLiteral;
|
|
scope(exit) current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
binaryLoop: while (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case '0':
|
|
case '1':
|
|
case '_':
|
|
keepChar();
|
|
break;
|
|
case 'u':
|
|
case 'U':
|
|
case 'L':
|
|
lexIntSuffix();
|
|
return;
|
|
default:
|
|
break binaryLoop;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexHex()
|
|
{
|
|
current.type = TokenType.intLiteral;
|
|
scope(exit) current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
bool foundDot;
|
|
hexLoop: while (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case 'a': .. case 'f':
|
|
case 'A': .. case 'F':
|
|
case '0': .. case '9':
|
|
case '_':
|
|
keepChar();
|
|
break;
|
|
case 'i':
|
|
case 'L':
|
|
if (foundDot)
|
|
{
|
|
lexFloatSuffix();
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
lexIntSuffix();
|
|
return;
|
|
}
|
|
case 'p':
|
|
case 'P':
|
|
lexExponent();
|
|
return;
|
|
case '.':
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (!r.isEoF() && r.front == '.')
|
|
break hexLoop; // slice expression
|
|
if (foundDot)
|
|
break hexLoop; // two dots with other characters between them
|
|
keepChar();
|
|
foundDot = true;
|
|
current.type = TokenType.doubleLiteral;
|
|
break;
|
|
default:
|
|
break hexLoop;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexStringSuffix()
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
if (!range.isEoF())
|
|
{
|
|
switch (range.front)
|
|
{
|
|
case 'w':
|
|
current.type = TokenType.wstringLiteral;
|
|
goto case 'c';
|
|
case 'd':
|
|
current.type = TokenType.dstringLiteral;
|
|
goto case 'c';
|
|
case 'c':
|
|
keepChar();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void lexString()
|
|
in
|
|
{
|
|
assert (range.front == '\'' || range.front == '"' || range.front == '`' || range.front == 'r');
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
bool isWysiwyg = range.front == 'r' || range.front == '`';
|
|
if (range.front == 'r')
|
|
keepChar();
|
|
|
|
scope (exit)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
current.value = (cast(char[]) buffer[0..bufferIndex]).idup;
|
|
else
|
|
{
|
|
if (buffer[0] == 'r')
|
|
current.value = (cast(char[]) buffer[2..bufferIndex - 1]).idup;
|
|
else
|
|
current.value = (cast(char[]) buffer[1..bufferIndex - 1]).idup;
|
|
}
|
|
}
|
|
|
|
auto quote = range.front;
|
|
keepChar();
|
|
while (true)
|
|
{
|
|
if (range.isEoF())
|
|
{
|
|
errorMessage("Unterminated string literal");
|
|
return;
|
|
}
|
|
else if (range.front == '\\' && !isWysiwyg)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.notEscaped)
|
|
{
|
|
auto r = range.save();
|
|
r.popFront();
|
|
if (r.front == quote && !isWysiwyg)
|
|
{
|
|
keepChar();
|
|
keepChar();
|
|
}
|
|
else if (r.front == '\\' && !isWysiwyg)
|
|
{
|
|
keepChar();
|
|
keepChar();
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
else
|
|
interpretEscapeSequence(range, index, buffer, bufferIndex);
|
|
}
|
|
else if (range.front == quote)
|
|
{
|
|
keepChar();
|
|
break;
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
lexStringSuffix();
|
|
}
|
|
|
|
void lexDelimitedString()
|
|
in
|
|
{
|
|
assert(range.front == 'q');
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
|
|
keepChar();
|
|
keepChar();
|
|
|
|
bool heredoc;
|
|
ubyte open;
|
|
ubyte close;
|
|
|
|
switch (range.front)
|
|
{
|
|
case '[': open = '['; close = ']'; break;
|
|
case '{': open = '{'; close = '}'; break;
|
|
case '(': open = '('; close = ')'; break;
|
|
case '<': open = '<'; close = '>'; break;
|
|
default: heredoc = true; break;
|
|
}
|
|
if (heredoc)
|
|
lexHeredocString();
|
|
else
|
|
lexNormalDelimitedString(open, close);
|
|
}
|
|
|
|
void lexNormalDelimitedString(ubyte open, ubyte close)
|
|
in
|
|
{
|
|
assert(buffer[0 .. bufferIndex] == "q\"");
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
int depth = 1;
|
|
keepChar();
|
|
scope (exit)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
else
|
|
current.value = (cast(char[]) buffer[3 .. bufferIndex - 2]).idup;
|
|
}
|
|
while (true)
|
|
{
|
|
if (range.isEoF())
|
|
errorMessage("Unterminated string literal");
|
|
if (range.front == open)
|
|
{
|
|
keepChar();
|
|
++depth;
|
|
}
|
|
else if (range.front == close)
|
|
{
|
|
keepChar();
|
|
--depth;
|
|
if (depth <= 0)
|
|
{
|
|
auto r = range.save();
|
|
if (r.front == '"')
|
|
{
|
|
keepChar();
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
errorMessage("Expected \" after balanced "
|
|
~ cast(char) close ~ " but found "
|
|
~ cast(char) r.front ~ " instead.");
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
|
|
}
|
|
|
|
void lexHeredocString()
|
|
in
|
|
{
|
|
assert (buffer[0 .. bufferIndex] == "q\"");
|
|
}
|
|
body
|
|
{
|
|
auto i = bufferIndex;
|
|
while (true)
|
|
{
|
|
if (range.isEoF())
|
|
{
|
|
errorMessage("Unterminated string literal");
|
|
return;
|
|
}
|
|
else if (isNewline(range))
|
|
{
|
|
keepChar();
|
|
break;
|
|
}
|
|
else if (isSeparating(range.front))
|
|
{
|
|
errorMessage("Unterminated string literal - Separating");
|
|
return;
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
auto ident = buffer[i .. bufferIndex - 1];
|
|
|
|
scope(exit)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
else
|
|
{
|
|
size_t b = 2 + ident.length;
|
|
if (buffer[b] == '\r') ++b;
|
|
if (buffer[b] == '\n') ++b;
|
|
size_t e = bufferIndex;
|
|
if (buffer[e - 1] == 'c' || buffer[e - 1] == 'd' || buffer[e - 1] == 'w')
|
|
--e;
|
|
stderr.writeln("b = ", b, " e = ", e);
|
|
current.value = (cast(char[]) buffer[b .. e]).idup;
|
|
}
|
|
}
|
|
|
|
while (true)
|
|
{
|
|
if (range.isEoF())
|
|
{
|
|
errorMessage("Unterminated string literal -- a");
|
|
return;
|
|
}
|
|
else if (buffer[bufferIndex - ident.length .. bufferIndex] == ident)
|
|
{
|
|
if (range.front == '"')
|
|
{
|
|
keepChar();
|
|
lexStringSuffix();
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
errorMessage("Unterminated string literal -- b");
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
keepChar();
|
|
}
|
|
}
|
|
|
|
void lexTokenString()
|
|
in
|
|
{
|
|
assert (range.front == 'q');
|
|
}
|
|
body
|
|
{
|
|
current.type = TokenType.stringLiteral;
|
|
size_t i;
|
|
|
|
scope (exit)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
current.value = (cast(char[]) buffer[0 .. bufferIndex]).idup;
|
|
else
|
|
current.value = (cast(char[]) buffer[2 .. bufferIndex - 1]).idup;
|
|
}
|
|
|
|
keepChar();
|
|
keepChar();
|
|
|
|
LexerConfig c;
|
|
c.iterStyle = IterationStyle.everything;
|
|
c.tokenStyle = TokenStyle.source;
|
|
|
|
auto r = byToken(range, c);
|
|
r.index = index;
|
|
int depth = 1;
|
|
while (!r.empty)
|
|
{
|
|
if (r.front.type == TokenType.lBrace)
|
|
{
|
|
++depth;
|
|
}
|
|
else if (r.front.type == TokenType.rBrace)
|
|
{
|
|
--depth;
|
|
if (depth <= 0)
|
|
{
|
|
if (config.tokenStyle & TokenStyle.includeQuotes)
|
|
{
|
|
if (bufferIndex >= buffer.length)
|
|
buffer.length += 1024;
|
|
buffer[bufferIndex++] = '}';
|
|
}
|
|
r.popFront();
|
|
break;
|
|
}
|
|
}
|
|
if (bufferIndex + r.front.value.length > buffer.length)
|
|
buffer.length += 1024;
|
|
buffer[bufferIndex .. bufferIndex + r.front.value.length] = cast(ubyte[]) r.front.value;
|
|
bufferIndex += r.front.value.length;
|
|
r.popFront();
|
|
}
|
|
lexStringSuffix();
|
|
}
|
|
|
|
void lexSpecialTokenSequence()
|
|
in
|
|
{
|
|
assert (range.front == '#');
|
|
}
|
|
body
|
|
{
|
|
keepChar();
|
|
auto r = range.save();
|
|
auto app = appender!(ubyte[])();
|
|
app.put('#');
|
|
while (true)
|
|
{
|
|
if (r.isEoF())
|
|
{
|
|
errorMessage("Found EOF when interpreting special token sequence");
|
|
return;
|
|
}
|
|
else if (isNewline(r))
|
|
break;
|
|
else
|
|
{
|
|
app.put(r.front);
|
|
r.popFront();
|
|
}
|
|
}
|
|
auto m = match((cast(char[]) app.data),
|
|
`#line\s+(?P<line>\d+)\s*(?P<filespec>".+")*?`);
|
|
if (m)
|
|
{
|
|
current.type = TokenType.specialTokenSequence;
|
|
current.value = (cast(char[]) app.data).idup;
|
|
column += app.data.length;
|
|
index += app.data.length;
|
|
range.popFrontN(app.data.length);
|
|
auto c = m.captures;
|
|
if (c["filespec"])
|
|
config.fileName = c["filespec"].idup;
|
|
auto l = c["line"];
|
|
lineNumber = parse!uint(l);
|
|
|
|
}
|
|
else
|
|
{
|
|
current.type = TokenType.hash;
|
|
current.value = getTokenValue(TokenType.hash);
|
|
}
|
|
}
|
|
|
|
void errorMessage(string s)
|
|
{
|
|
import std.stdio;
|
|
if (config.errorFunc !is null)
|
|
config.errorFunc(config.fileName, current.startIndex,
|
|
current.line, current.column, s);
|
|
else
|
|
stderr.writefln("%s(%d:%d): %s", config.fileName, current.line,
|
|
current.column, s);
|
|
}
|
|
|
|
void keepChar()
|
|
{
|
|
if (bufferIndex + 2 >= buffer.length)
|
|
buffer.length += 1024;
|
|
bool foundNewline;
|
|
if (range.front == '\r')
|
|
{
|
|
buffer[bufferIndex++] = range.front;
|
|
range.popFront();
|
|
++index;
|
|
foundNewline = true;
|
|
}
|
|
if (range.front == '\n')
|
|
{
|
|
buffer[bufferIndex++] = range.front;
|
|
range.popFront();
|
|
++index;
|
|
foundNewline = true;
|
|
}
|
|
else
|
|
{
|
|
buffer[bufferIndex++] = range.front;
|
|
range.popFront();
|
|
++index;
|
|
++column;
|
|
}
|
|
if (foundNewline)
|
|
{
|
|
++lineNumber;
|
|
column = 0;
|
|
}
|
|
}
|
|
|
|
Token current;
|
|
uint lineNumber;
|
|
uint index;
|
|
uint column;
|
|
R range;
|
|
bool _empty;
|
|
ubyte[] buffer;
|
|
size_t bufferIndex;
|
|
LexerConfig config;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is an operator
|
|
*/
|
|
pure nothrow bool isOperator(const TokenType t)
|
|
{
|
|
return t >= TokenType.assign && t <= TokenType.xorEquals;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a keyword
|
|
*/
|
|
pure nothrow bool isKeyword(const TokenType t)
|
|
{
|
|
return t >= TokenType.bool_ && t <= TokenType.with_;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a built-in type
|
|
*/
|
|
pure nothrow bool isType(const TokenType t)
|
|
{
|
|
return t >= TokenType.bool_ && t <= TokenType.wstring_;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is an attribute
|
|
*/
|
|
pure nothrow bool isAttribute(const TokenType t)
|
|
{
|
|
return t >= TokenType.align_ && t <= TokenType.static_;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a protection attribute
|
|
*/
|
|
pure nothrow bool isProtection(const TokenType t)
|
|
{
|
|
return t >= TokenType.export_ && t <= TokenType.public_;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a compile-time constant such as ___DATE__
|
|
*/
|
|
pure nothrow bool isConstant(const TokenType t)
|
|
{
|
|
return t >= TokenType.date && t <= TokenType.traits;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a string or number literal
|
|
*/
|
|
pure nothrow bool isLiteral(const TokenType t)
|
|
{
|
|
return t >= TokenType.doubleLiteral && t <= TokenType.wstringLiteral;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a number literal
|
|
*/
|
|
pure nothrow bool isNumberLiteral(const TokenType t)
|
|
{
|
|
return t >= TokenType.doubleLiteral && t <= TokenType.ulongLiteral;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is a string literal
|
|
*/
|
|
pure nothrow bool isStringLiteral(const TokenType t)
|
|
{
|
|
return t >= TokenType.dstringLiteral && t <= TokenType.wstringLiteral;
|
|
}
|
|
|
|
/**
|
|
* Returns: true if the token is whitespace, a commemnt, a special token
|
|
* sequence, or an identifier
|
|
*/
|
|
pure nothrow bool isMisc(const TokenType t)
|
|
{
|
|
return t >= TokenType.comment && t <= TokenType.specialTokenSequence;
|
|
}
|
|
|
|
/**
|
|
* Listing of all the tokens in the D language.
|
|
*/
|
|
enum TokenType: ushort
|
|
{
|
|
assign, /// =
|
|
at, /// @
|
|
bitAnd, /// &
|
|
bitAndEquals, /// &=
|
|
bitOr, /// |
|
|
bitOrEquals, /// |=
|
|
catEquals, /// ~=
|
|
colon, /// :
|
|
comma, /// ,
|
|
decrement, /// --
|
|
div, /// /
|
|
divEquals, /// /=
|
|
dollar, /// $
|
|
dot, /// .
|
|
equals, /// ==
|
|
goesTo, /// =>
|
|
greater, /// >
|
|
greaterEqual, /// >=
|
|
hash, /// #
|
|
increment, /// ++
|
|
lBrace, /// {
|
|
lBracket, /// [
|
|
less, /// <
|
|
lessEqual, /// <=
|
|
lessEqualGreater, /// <>=
|
|
lessOrGreater, /// <>
|
|
logicAnd, /// &&
|
|
logicOr, /// ||
|
|
lParen, /// $(LPAREN)
|
|
minus, /// -
|
|
minusEquals, /// -=
|
|
mod, /// %
|
|
modEquals, /// %=
|
|
mulEquals, /// *=
|
|
not, /// !
|
|
notEquals, /// !=
|
|
notGreater, /// !>
|
|
notGreaterEqual, /// !>=
|
|
notLess, /// !<
|
|
notLessEqual, /// !<=
|
|
notLessEqualGreater, /// !<>
|
|
plus, /// +
|
|
plusEquals, /// +=
|
|
pow, /// ^^
|
|
powEquals, /// ^^=
|
|
rBrace, /// }
|
|
rBracket, /// ]
|
|
rParen, /// $(RPAREN)
|
|
semicolon, /// ;
|
|
shiftLeft, /// <<
|
|
shiftLeftEqual, /// <<=
|
|
shiftRight, /// >>
|
|
shiftRightEqual, /// >>=
|
|
slice, /// ..
|
|
star, /// *
|
|
ternary, /// ?
|
|
tilde, /// ~
|
|
unordered, /// !<>=
|
|
unsignedShiftRight, /// >>>
|
|
unsignedShiftRightEqual, /// >>>=
|
|
vararg, /// ...
|
|
xor, /// ^
|
|
xorEquals, /// ^=
|
|
|
|
bool_, /// $(D_KEYWORD bool)
|
|
byte_, /// $(D_KEYWORD byte)
|
|
cdouble_, /// $(D_KEYWORD cdouble)
|
|
cent_, /// $(D_KEYWORD cent)
|
|
cfloat_, /// $(D_KEYWORD cfloat)
|
|
char_, /// $(D_KEYWORD char)
|
|
creal_, /// $(D_KEYWORD creal)
|
|
dchar_, /// $(D_KEYWORD dchar)
|
|
double_, /// $(D_KEYWORD double)
|
|
dstring_, /// $(D_KEYWORD dstring)
|
|
float_, /// $(D_KEYWORD float)
|
|
function_, /// $(D_KEYWORD function)
|
|
idouble_, /// $(D_KEYWORD idouble)
|
|
ifloat_, /// $(D_KEYWORD ifloat)
|
|
int_, /// $(D_KEYWORD int)
|
|
ireal_, /// $(D_KEYWORD ireal)
|
|
long_, /// $(D_KEYWORD long)
|
|
real_, /// $(D_KEYWORD real)
|
|
short_, /// $(D_KEYWORD short)
|
|
string_, /// $(D_KEYWORD string)
|
|
ubyte_, /// $(D_KEYWORD ubyte)
|
|
ucent_, /// $(D_KEYWORD ucent)
|
|
uint_, /// $(D_KEYWORD uint)
|
|
ulong_, /// $(D_KEYWORD ulong)
|
|
ushort_, /// $(D_KEYWORD ushort)
|
|
void_, /// $(D_KEYWORD void)
|
|
wchar_, /// $(D_KEYWORD wchar)
|
|
wstring_, /// $(D_KEYWORD wstring)
|
|
|
|
align_, /// $(D_KEYWORD align)
|
|
deprecated_, /// $(D_KEYWORD deprecated)
|
|
extern_, /// $(D_KEYWORD extern)
|
|
pragma_, /// $(D_KEYWORD pragma)
|
|
export_, /// $(D_KEYWORD export)
|
|
package_, /// $(D_KEYWORD package)
|
|
private_, /// $(D_KEYWORD private)
|
|
protected_, /// $(D_KEYWORD protected)
|
|
public_, /// $(D_KEYWORD public)
|
|
abstract_, /// $(D_KEYWORD abstract)
|
|
auto_, /// $(D_KEYWORD auto)
|
|
const_, /// $(D_KEYWORD const)
|
|
final_, /// $(D_KEYWORD final)
|
|
gshared, /// $(D_KEYWORD __gshared)
|
|
immutable_, // immutable
|
|
inout_, // inout
|
|
scope_, /// $(D_KEYWORD scope)
|
|
shared_, // shared
|
|
static_, /// $(D_KEYWORD static)
|
|
|
|
synchronized_, /// $(D_KEYWORD synchronized)
|
|
alias_, /// $(D_KEYWORD alias)
|
|
asm_, /// $(D_KEYWORD asm)
|
|
assert_, /// $(D_KEYWORD assert)
|
|
body_, /// $(D_KEYWORD body)
|
|
break_, /// $(D_KEYWORD break)
|
|
case_, /// $(D_KEYWORD case)
|
|
cast_, /// $(D_KEYWORD cast)
|
|
catch_, /// $(D_KEYWORD catch)
|
|
class_, /// $(D_KEYWORD class)
|
|
continue_, /// $(D_KEYWORD continue)
|
|
debug_, /// $(D_KEYWORD debug)
|
|
default_, /// $(D_KEYWORD default)
|
|
delegate_, /// $(D_KEYWORD delegate)
|
|
delete_, /// $(D_KEYWORD delete)
|
|
do_, /// $(D_KEYWORD do)
|
|
else_, /// $(D_KEYWORD else)
|
|
enum_, /// $(D_KEYWORD enum)
|
|
false_, /// $(D_KEYWORD false)
|
|
finally_, /// $(D_KEYWORD finally)
|
|
foreach_, /// $(D_KEYWORD foreach)
|
|
foreach_reverse_, /// $(D_KEYWORD foreach_reverse)
|
|
for_, /// $(D_KEYWORD for)
|
|
goto_, /// $(D_KEYWORD goto)
|
|
if_, /// $(D_KEYWORD if)
|
|
import_, /// $(D_KEYWORD import)
|
|
in_, /// $(D_KEYWORD in)
|
|
interface_, /// $(D_KEYWORD interface)
|
|
invariant_, /// $(D_KEYWORD invariant)
|
|
is_, /// $(D_KEYWORD is)
|
|
lazy_, /// $(D_KEYWORD lazy)
|
|
macro_, /// $(D_KEYWORD macro)
|
|
mixin_, /// $(D_KEYWORD mixin)
|
|
module_, /// $(D_KEYWORD module)
|
|
new_, /// $(D_KEYWORD new)
|
|
nothrow_, /// $(D_KEYWORD nothrow)
|
|
null_, /// $(D_KEYWORD null)
|
|
out_, /// $(D_KEYWORD out)
|
|
override_, /// $(D_KEYWORD override)
|
|
pure_, /// $(D_KEYWORD pure)
|
|
ref_, /// $(D_KEYWORD ref)
|
|
return_, /// $(D_KEYWORD return)
|
|
struct_, /// $(D_KEYWORD struct)
|
|
super_, /// $(D_KEYWORD super)
|
|
switch_, /// $(D_KEYWORD switch)
|
|
template_, /// $(D_KEYWORD template)
|
|
this_, /// $(D_KEYWORD this)
|
|
throw_, /// $(D_KEYWORD throw)
|
|
true_, /// $(D_KEYWORD true)
|
|
try_, /// $(D_KEYWORD try)
|
|
typedef_, /// $(D_KEYWORD typedef)
|
|
typeid_, /// $(D_KEYWORD typeid)
|
|
typeof_, /// $(D_KEYWORD typeof)
|
|
union_, /// $(D_KEYWORD union)
|
|
unittest_, /// $(D_KEYWORD unittest)
|
|
version_, /// $(D_KEYWORD version)
|
|
volatile_, /// $(D_KEYWORD volatile)
|
|
while_, /// $(D_KEYWORD while)
|
|
with_, /// $(D_KEYWORD with)
|
|
|
|
date, /// ___DATE__
|
|
eof, /// ___EOF__
|
|
time, /// ___TIME__
|
|
timestamp, /// ___TIMESTAMP__
|
|
vendor, /// ___VENDOR__
|
|
compilerVersion, /// ___VERSION__
|
|
file, /// $(D_KEYWORD ___FILE__)
|
|
line, /// $(D_KEYWORD ___LINE__)
|
|
comment, /// $(D_COMMENT /** comment */) or $(D_COMMENT // comment) or $(D_COMMENT ///comment)
|
|
identifier, /// anything else
|
|
scriptLine, // Line at the beginning of source file that starts from #!
|
|
traits, /// $(D_KEYWORD ___traits)
|
|
parameters, /// $(D_KEYWORD ___parameters)
|
|
vector, /// $(D_KEYWORD ___vector)
|
|
whitespace, /// whitespace
|
|
specialTokenSequence, /// #line 10 "file.d"
|
|
doubleLiteral, /// 123.456
|
|
floatLiteral, /// 123.456f or 0x123_45p-3
|
|
idoubleLiteral, /// 123.456i
|
|
ifloatLiteral, /// 123.456fi
|
|
intLiteral, /// 123 or 0b1101010101
|
|
longLiteral, /// 123L
|
|
realLiteral, /// 123.456L
|
|
irealLiteral, /// 123.456Li
|
|
uintLiteral, /// 123u
|
|
ulongLiteral, /// 123uL
|
|
dstringLiteral, /// $(D_STRING "32-bit character string"d)
|
|
stringLiteral, /// $(D_STRING "an 8-bit string")
|
|
wstringLiteral, /// $(D_STRING "16-bit character string"w)
|
|
}
|
|
|
|
// Implementation details follow
|
|
private:
|
|
|
|
/*
|
|
* To avoid memory allocations Token.value is set to a slice of this string
|
|
* for operators and keywords.
|
|
*/
|
|
immutable string opKwdValues =
|
|
"#/=*=+=++-=--^^=~=<<=%==>>>=||=&&=,;:!<=!<>=!=!>=?...()[]{}@$"
|
|
~ "boolcdoublecentcfloatcrealdchardstringfunctionidoubleifloatirealubyte"
|
|
~ "ucentuintulongushortvoidwcharwstringaligndeprecatedexternpragmaexport"
|
|
~ "packageprivateprotectedpublicabstractautoconstfinal__gsharedimmutable"
|
|
~ "inoutscopesharedstaticsynchronizedaliasasmassertbodybreakcasecastcatch"
|
|
~ "classcontinuedebugdefaultdelegatedeleteelseenumfalsefinally"
|
|
~ "foreach_reversegotoimportinterfaceinvariantlazymacromixinmodule"
|
|
~ "newnothrownulloverridepurerefreturnstructsuperswitchtemplatethistruetry"
|
|
~ "typedeftypeidtypeofunionunittestversionvolatilewhilewith__traits"
|
|
~ "__vector__parameters__DATE__EOF__TIME__TIMESTAMP__VENDOR__VERSION__"
|
|
~ "FILE__LINE__";
|
|
|
|
/*
|
|
* Slices of the above string to save memory. This array is automatically
|
|
* generated.
|
|
*/
|
|
immutable(string[]) tokenValues = [
|
|
opKwdValues[2 .. 3], // =
|
|
opKwdValues[59 .. 60], // @
|
|
opKwdValues[31 .. 32], // &
|
|
opKwdValues[32 .. 34], // &=
|
|
opKwdValues[28 .. 29], // |
|
|
opKwdValues[29 .. 31], // |=
|
|
opKwdValues[16 .. 18], // ~=
|
|
opKwdValues[36 .. 37], // :
|
|
opKwdValues[34 .. 35], // ,
|
|
opKwdValues[11 .. 13], // --
|
|
opKwdValues[1 .. 2], // /
|
|
opKwdValues[1 .. 3], // /=
|
|
opKwdValues[60 .. 61], // $
|
|
opKwdValues[50 .. 51], // .
|
|
opKwdValues[22 .. 24], // ==
|
|
opKwdValues[23 .. 25], // =>
|
|
opKwdValues[24 .. 25], // >
|
|
opKwdValues[26 .. 28], // >=
|
|
opKwdValues[0 .. 1], // #
|
|
opKwdValues[7 .. 9], // ++
|
|
opKwdValues[57 .. 58], // {
|
|
opKwdValues[55 .. 56], // [
|
|
opKwdValues[18 .. 19], // <
|
|
opKwdValues[19 .. 21], // <=
|
|
opKwdValues[41 .. 44], // <>=
|
|
opKwdValues[41 .. 43], // <>
|
|
opKwdValues[31 .. 33], // &&
|
|
opKwdValues[28 .. 30], // ||
|
|
opKwdValues[53 .. 54], // (
|
|
opKwdValues[9 .. 10], // -
|
|
opKwdValues[9 .. 11], // -=
|
|
opKwdValues[21 .. 22], // %
|
|
opKwdValues[21 .. 23], // %=
|
|
opKwdValues[3 .. 5], // *=
|
|
opKwdValues[37 .. 38], // !
|
|
opKwdValues[44 .. 46], // !=
|
|
opKwdValues[46 .. 48], // !>
|
|
opKwdValues[46 .. 49], // !>=
|
|
opKwdValues[37 .. 39], // !<
|
|
opKwdValues[37 .. 40], // !<=
|
|
opKwdValues[40 .. 43], // !<>
|
|
opKwdValues[5 .. 6], // +
|
|
opKwdValues[5 .. 7], // +=
|
|
opKwdValues[13 .. 15], // ^^
|
|
opKwdValues[13 .. 16], // ^^=
|
|
opKwdValues[58 .. 59], // }
|
|
opKwdValues[56 .. 57], // ]
|
|
opKwdValues[54 .. 55], // )
|
|
opKwdValues[35 .. 36], // ;
|
|
opKwdValues[18 .. 20], // <<
|
|
opKwdValues[18 .. 21], // <<=
|
|
opKwdValues[24 .. 26], // >>
|
|
opKwdValues[25 .. 28], // >>=
|
|
opKwdValues[50 .. 52], // ..
|
|
opKwdValues[3 .. 4], // *
|
|
opKwdValues[49 .. 50], // ?
|
|
opKwdValues[16 .. 17], // ~
|
|
opKwdValues[40 .. 44], // !<>=
|
|
opKwdValues[24 .. 27], // >>>
|
|
opKwdValues[24 .. 28], // >>>=
|
|
opKwdValues[50 .. 53], // ...
|
|
opKwdValues[13 .. 14], // ^
|
|
opKwdValues[14 .. 16], // ^=
|
|
opKwdValues[61 .. 65], // bool
|
|
opKwdValues[126 .. 130], // byte
|
|
opKwdValues[65 .. 72], // cdouble
|
|
opKwdValues[72 .. 76], // cent
|
|
opKwdValues[76 .. 82], // cfloat
|
|
opKwdValues[88 .. 92], // char
|
|
opKwdValues[82 .. 87], // creal
|
|
opKwdValues[87 .. 92], // dchar
|
|
opKwdValues[66 .. 72], // double
|
|
opKwdValues[92 .. 99], // dstring
|
|
opKwdValues[77 .. 82], // float
|
|
opKwdValues[99 .. 107], // function
|
|
opKwdValues[107 .. 114], // idouble
|
|
opKwdValues[114 .. 120], // ifloat
|
|
opKwdValues[136 .. 139], // int
|
|
opKwdValues[120 .. 125], // ireal
|
|
opKwdValues[140 .. 144], // long
|
|
opKwdValues[83 .. 87], // real
|
|
opKwdValues[145 .. 150], // short
|
|
opKwdValues[93 .. 99], // string
|
|
opKwdValues[125 .. 130], // ubyte
|
|
opKwdValues[130 .. 135], // ucent
|
|
opKwdValues[135 .. 139], // uint
|
|
opKwdValues[139 .. 144], // ulong
|
|
opKwdValues[144 .. 150], // ushort
|
|
opKwdValues[150 .. 154], // void
|
|
opKwdValues[154 .. 159], // wchar
|
|
opKwdValues[159 .. 166], // wstring
|
|
opKwdValues[166 .. 171], // align
|
|
opKwdValues[171 .. 181], // deprecated
|
|
opKwdValues[181 .. 187], // extern
|
|
opKwdValues[187 .. 193], // pragma
|
|
opKwdValues[193 .. 199], // export
|
|
opKwdValues[199 .. 206], // package
|
|
opKwdValues[206 .. 213], // private
|
|
opKwdValues[213 .. 222], // protected
|
|
opKwdValues[222 .. 228], // public
|
|
opKwdValues[228 .. 236], // abstract
|
|
opKwdValues[236 .. 240], // auto
|
|
opKwdValues[240 .. 245], // const
|
|
opKwdValues[245 .. 250], // final
|
|
opKwdValues[250 .. 259], // __gshared
|
|
opKwdValues[259 .. 268], // immutable
|
|
opKwdValues[268 .. 273], // inout
|
|
opKwdValues[273 .. 278], // scope
|
|
opKwdValues[253 .. 259], // shared
|
|
opKwdValues[284 .. 290], // static
|
|
opKwdValues[290 .. 302], // synchronized
|
|
opKwdValues[302 .. 307], // alias
|
|
opKwdValues[307 .. 310], // asm
|
|
opKwdValues[310 .. 316], // assert
|
|
opKwdValues[316 .. 320], // body
|
|
opKwdValues[320 .. 325], // break
|
|
opKwdValues[325 .. 329], // case
|
|
opKwdValues[329 .. 333], // cast
|
|
opKwdValues[333 .. 338], // catch
|
|
opKwdValues[338 .. 343], // class
|
|
opKwdValues[343 .. 351], // continue
|
|
opKwdValues[351 .. 356], // debug
|
|
opKwdValues[356 .. 363], // default
|
|
opKwdValues[363 .. 371], // delegate
|
|
opKwdValues[371 .. 377], // delete
|
|
opKwdValues[66 .. 68], // do
|
|
opKwdValues[377 .. 381], // else
|
|
opKwdValues[381 .. 385], // enum
|
|
opKwdValues[385 .. 390], // false
|
|
opKwdValues[390 .. 397], // finally
|
|
opKwdValues[397 .. 404], // foreach
|
|
opKwdValues[397 .. 412], // foreach_reverse
|
|
opKwdValues[397 .. 400], // for
|
|
opKwdValues[412 .. 416], // goto
|
|
opKwdValues[114 .. 116], // if
|
|
opKwdValues[416 .. 422], // import
|
|
opKwdValues[96 .. 98], // in
|
|
opKwdValues[422 .. 431], // interface
|
|
opKwdValues[431 .. 440], // invariant
|
|
opKwdValues[522 .. 524], // is
|
|
opKwdValues[440 .. 444], // lazy
|
|
opKwdValues[444 .. 449], // macro
|
|
opKwdValues[449 .. 454], // mixin
|
|
opKwdValues[454 .. 460], // module
|
|
opKwdValues[460 .. 463], // new
|
|
opKwdValues[463 .. 470], // nothrow
|
|
opKwdValues[470 .. 474], // null
|
|
opKwdValues[270 .. 273], // out
|
|
opKwdValues[474 .. 482], // override
|
|
opKwdValues[482 .. 486], // pure
|
|
opKwdValues[486 .. 489], // ref
|
|
opKwdValues[489 .. 495], // return
|
|
opKwdValues[495 .. 501], // struct
|
|
opKwdValues[501 .. 506], // super
|
|
opKwdValues[506 .. 512], // switch
|
|
opKwdValues[512 .. 520], // template
|
|
opKwdValues[520 .. 524], // this
|
|
opKwdValues[465 .. 470], // throw
|
|
opKwdValues[524 .. 528], // true
|
|
opKwdValues[528 .. 531], // try
|
|
opKwdValues[531 .. 538], // typedef
|
|
opKwdValues[538 .. 544], // typeid
|
|
opKwdValues[544 .. 550], // typeof
|
|
opKwdValues[550 .. 555], // union
|
|
opKwdValues[555 .. 563], // unittest
|
|
opKwdValues[563 .. 570], // version
|
|
opKwdValues[570 .. 578], // volatile
|
|
opKwdValues[578 .. 583], // while
|
|
opKwdValues[583 .. 587], // with
|
|
opKwdValues[615 .. 623], // __DATE__
|
|
opKwdValues[621 .. 628], // __EOF__
|
|
opKwdValues[626 .. 634], // __TIME__
|
|
opKwdValues[632 .. 645], // __TIMESTAMP__
|
|
opKwdValues[643 .. 653], // __VENDOR__
|
|
opKwdValues[651 .. 662], // __VERSION__
|
|
opKwdValues[660 .. 668], // __FILE__
|
|
opKwdValues[666 .. 674], // __LINE__
|
|
null,
|
|
null,
|
|
null,
|
|
opKwdValues[587 .. 595], // __traits
|
|
opKwdValues[603 .. 615], // __parameters
|
|
opKwdValues[595 .. 603], // __vector
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
];
|
|
|
|
pure string getTokenValue(const TokenType type)
|
|
{
|
|
return tokenValues[type];
|
|
}
|
|
|
|
private pure bool isNewline(R)(R range)
|
|
{
|
|
return range.front == '\n' || range.front == '\r';
|
|
}
|
|
|
|
pure bool isEoF(R)(R range)
|
|
{
|
|
return range.empty || range.front == 0 || range.front == 0x1a;
|
|
}
|
|
|
|
ubyte[] popDigitChars(R, alias isInterestingDigit)(ref R input, ref uint index,
|
|
uint upTo) if (isForwardRange!R)
|
|
{
|
|
ubyte[] chars;
|
|
chars.reserve(upTo);
|
|
for (uint i = 0; i != upTo; ++i)
|
|
{
|
|
if (isInterestingDigit(input.front))
|
|
{
|
|
chars ~= input.front;
|
|
input.popFront();
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
return chars;
|
|
}
|
|
|
|
ubyte[] popHexChars(R)(ref R input, ref uint index, uint upTo)
|
|
{
|
|
return popDigitChars!(R, isHexDigit)(input, index, upTo);
|
|
}
|
|
|
|
ubyte[] popOctalChars(R)(ref R input, ref uint index, uint upTo)
|
|
{
|
|
return popDigitChars!(R, isOctalDigit)(input, index, upTo);
|
|
}
|
|
|
|
void interpretEscapeSequence(R)(ref R input, ref uint index, ref ubyte[] buffer,
|
|
ref size_t i) if (isForwardRange!R)
|
|
in
|
|
{
|
|
assert(input.front == '\\');
|
|
}
|
|
body
|
|
{
|
|
input.popFront();
|
|
short h = 0;
|
|
switch (input.front)
|
|
{
|
|
case '\'':
|
|
case '\"':
|
|
case '?':
|
|
case '\\':
|
|
case 0:
|
|
case 0x1a:
|
|
auto f = input.front;
|
|
input.popFront();
|
|
++index;
|
|
auto s = to!string(cast(char) f);
|
|
buffer[i .. i + s.length] = cast(ubyte[]) s;
|
|
return;
|
|
case 'a': input.popFront(); ++index; buffer[i++] = '\a'; return;
|
|
case 'b': input.popFront(); ++index; buffer[i++] = '\b'; return;
|
|
case 'f': input.popFront(); ++index; buffer[i++] = '\f'; return;
|
|
case 'n': input.popFront(); ++index; buffer[i++] = '\n'; return;
|
|
case 'r': input.popFront(); ++index; buffer[i++] = '\r'; return;
|
|
case 't': input.popFront(); ++index; buffer[i++] = '\t'; return;
|
|
case 'v': input.popFront(); ++index; buffer[i++] = '\v'; return;
|
|
case 'x': h = 2; goto hex;
|
|
case 'u': h = 4; goto hex;
|
|
case 'U': h = 8; goto hex;
|
|
case '0': .. case '7':
|
|
auto octalChars = cast(char[]) popOctalChars(input, index, 3);
|
|
char[4] b;
|
|
auto n = encode(b, cast(dchar) parse!uint(octalChars, 8));
|
|
buffer[i .. i + n] = cast(ubyte[]) b[0 .. n];
|
|
i += n;
|
|
return;
|
|
case '&':
|
|
input.popFront();
|
|
++index;
|
|
auto entity = appender!(ubyte[])();
|
|
while (!input.isEoF() && input.front != ';')
|
|
{
|
|
entity.put(input.front);
|
|
input.popFront();
|
|
++index;
|
|
}
|
|
if (!isEoF(input))
|
|
{
|
|
auto decoded = to!string(cast(char[]) entity.data) in characterEntities;
|
|
input.popFront();
|
|
++index;
|
|
if (decoded !is null)
|
|
{
|
|
buffer[i .. i + decoded.length] = cast(ubyte[]) *decoded;
|
|
i += decoded.length;
|
|
}
|
|
}
|
|
return;
|
|
default:
|
|
input.popFront();
|
|
++index;
|
|
// This is an error
|
|
buffer[i++] = '\\';
|
|
return;
|
|
}
|
|
|
|
hex:
|
|
input.popFront();
|
|
auto hexChars = cast(char[]) popHexChars(input, index, h);
|
|
char[4] b;
|
|
auto n = encode(b, cast(dchar) parse!uint(hexChars, 16));
|
|
buffer[i .. i + n] = cast(ubyte[]) b[0 .. n];
|
|
i += n;
|
|
return;
|
|
}
|
|
|
|
pure nothrow bool isSeparating(ubyte ch)
|
|
{
|
|
return (ch >= '!' && ch <= '/')
|
|
|| (ch >= ':' && ch <= '@')
|
|
|| (ch >= '[' && ch <= '^')
|
|
|| (ch >= '{' && ch <= '~')
|
|
|| ch == '`'
|
|
|| ch == 0x20
|
|
|| ch == 0x09
|
|
|| ch == 0x0a;
|
|
}
|
|
|
|
pure nothrow TokenType lookupTokenType(const const(char)[] input)
|
|
{
|
|
switch(input.length)
|
|
{
|
|
case 2:
|
|
switch (input)
|
|
{
|
|
case "do": return TokenType.do_;
|
|
case "if": return TokenType.if_;
|
|
case "in": return TokenType.in_;
|
|
case "is": return TokenType.is_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 3:
|
|
switch (input)
|
|
{
|
|
case "asm": return TokenType.asm_;
|
|
case "for": return TokenType.for_;
|
|
case "int": return TokenType.int_;
|
|
case "new": return TokenType.new_;
|
|
case "out": return TokenType.out_;
|
|
case "ref": return TokenType.ref_;
|
|
case "try": return TokenType.try_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 4:
|
|
switch (input)
|
|
{
|
|
case "auto": return TokenType.auto_;
|
|
case "body": return TokenType.body_;
|
|
case "bool": return TokenType.bool_;
|
|
case "byte": return TokenType.byte_;
|
|
case "case": return TokenType.case_;
|
|
case "cast": return TokenType.cast_;
|
|
case "cent": return TokenType.cent_;
|
|
case "char": return TokenType.char_;
|
|
case "else": return TokenType.else_;
|
|
case "enum": return TokenType.enum_;
|
|
case "goto": return TokenType.goto_;
|
|
case "lazy": return TokenType.lazy_;
|
|
case "long": return TokenType.long_;
|
|
case "null": return TokenType.null_;
|
|
case "pure": return TokenType.pure_;
|
|
case "real": return TokenType.real_;
|
|
case "this": return TokenType.this_;
|
|
case "true": return TokenType.true_;
|
|
case "uint": return TokenType.uint_;
|
|
case "void": return TokenType.void_;
|
|
case "with": return TokenType.with_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 5:
|
|
switch (input)
|
|
{
|
|
case "alias": return TokenType.alias_;
|
|
case "align": return TokenType.align_;
|
|
case "break": return TokenType.break_;
|
|
case "catch": return TokenType.catch_;
|
|
case "class": return TokenType.class_;
|
|
case "const": return TokenType.const_;
|
|
case "creal": return TokenType.creal_;
|
|
case "dchar": return TokenType.dchar_;
|
|
case "debug": return TokenType.debug_;
|
|
case "false": return TokenType.false_;
|
|
case "final": return TokenType.final_;
|
|
case "float": return TokenType.float_;
|
|
case "inout": return TokenType.inout_;
|
|
case "ireal": return TokenType.ireal_;
|
|
case "macro": return TokenType.macro_;
|
|
case "mixin": return TokenType.mixin_;
|
|
case "scope": return TokenType.scope_;
|
|
case "short": return TokenType.short_;
|
|
case "super": return TokenType.super_;
|
|
case "throw": return TokenType.throw_;
|
|
case "ubyte": return TokenType.ubyte_;
|
|
case "ucent": return TokenType.ucent_;
|
|
case "ulong": return TokenType.ulong_;
|
|
case "union": return TokenType.union_;
|
|
case "wchar": return TokenType.wchar_;
|
|
case "while": return TokenType.while_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 6:
|
|
switch (input)
|
|
{
|
|
case "assert": return TokenType.assert_;
|
|
case "cfloat": return TokenType.cfloat_;
|
|
case "delete": return TokenType.delete_;
|
|
case "double": return TokenType.double_;
|
|
case "export": return TokenType.export_;
|
|
case "extern": return TokenType.extern_;
|
|
case "ifloat": return TokenType.ifloat_;
|
|
case "import": return TokenType.import_;
|
|
case "module": return TokenType.module_;
|
|
case "pragma": return TokenType.pragma_;
|
|
case "public": return TokenType.public_;
|
|
case "return": return TokenType.return_;
|
|
case "shared": return TokenType.shared_;
|
|
case "static": return TokenType.static_;
|
|
case "string": return TokenType.string_;
|
|
case "struct": return TokenType.struct_;
|
|
case "switch": return TokenType.switch_;
|
|
case "typeid": return TokenType.typeid_;
|
|
case "typeof": return TokenType.typeof_;
|
|
case "ushort": return TokenType.ushort_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 7:
|
|
switch (input)
|
|
{
|
|
case "__EOF__": return TokenType.eof;
|
|
case "cdouble": return TokenType.cdouble_;
|
|
case "default": return TokenType.default_;
|
|
case "dstring": return TokenType.dstring_;
|
|
case "finally": return TokenType.finally_;
|
|
case "foreach": return TokenType.foreach_;
|
|
case "idouble": return TokenType.idouble_;
|
|
case "nothrow": return TokenType.nothrow_;
|
|
case "package": return TokenType.package_;
|
|
case "private": return TokenType.private_;
|
|
case "typedef": return TokenType.typedef_;
|
|
case "version": return TokenType.version_;
|
|
case "wstring": return TokenType.wstring_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 8:
|
|
switch (input)
|
|
{
|
|
case "override": return TokenType.override_;
|
|
case "continue": return TokenType.continue_;
|
|
case "__LINE__": return TokenType.line;
|
|
case "template": return TokenType.template_;
|
|
case "abstract": return TokenType.abstract_;
|
|
case "__traits": return TokenType.traits;
|
|
case "volatile": return TokenType.volatile_;
|
|
case "delegate": return TokenType.delegate_;
|
|
case "function": return TokenType.function_;
|
|
case "unittest": return TokenType.unittest_;
|
|
case "__FILE__": return TokenType.file;
|
|
case "__DATE__": return TokenType.date;
|
|
case "__TIME__": return TokenType.time;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 9:
|
|
switch (input)
|
|
{
|
|
case "__gshared": return TokenType.gshared;
|
|
case "immutable": return TokenType.immutable_;
|
|
case "interface": return TokenType.interface_;
|
|
case "invariant": return TokenType.invariant_;
|
|
case "protected": return TokenType.protected_;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 10:
|
|
switch (input)
|
|
{
|
|
case "deprecated": return TokenType.deprecated_;
|
|
case "__VENDOR__": return TokenType.vendor;
|
|
default: break;
|
|
}
|
|
break;
|
|
case 11:
|
|
if (input == "__VERSION__")
|
|
return TokenType.compilerVersion;
|
|
break;
|
|
case 12:
|
|
if (input == "synchronized")
|
|
return TokenType.synchronized_;
|
|
break;
|
|
case 13:
|
|
if (input == "__TIMESTAMP__")
|
|
return TokenType.timestamp;
|
|
break;
|
|
case 15:
|
|
if (input == "foreach_reverse")
|
|
return TokenType.foreach_reverse_;
|
|
break;
|
|
default: break;
|
|
}
|
|
return TokenType.identifier;
|
|
}
|
|
|
|
class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
|
|
{
|
|
/**
|
|
* Adds the given value to the trie with the given key
|
|
*/
|
|
void add(K key, V value) pure
|
|
{
|
|
TrieNode!(K,V) current = this;
|
|
foreach(keyPart; key)
|
|
{
|
|
if ((keyPart in current.children) is null)
|
|
{
|
|
auto node = new TrieNode!(K, V);
|
|
current.children[keyPart] = node;
|
|
current = node;
|
|
}
|
|
else
|
|
current = current.children[keyPart];
|
|
}
|
|
current.value = value;
|
|
}
|
|
}
|
|
|
|
class TrieNode(K, V) if (isInputRange!K)
|
|
{
|
|
V value;
|
|
TrieNode!(K,V)[ElementType!K] children;
|
|
}
|
|
|
|
string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
|
|
{
|
|
string caseStatement = "";
|
|
foreach(dchar k, TrieNode!(K,V) v; node.children)
|
|
{
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "case '";
|
|
caseStatement ~= k;
|
|
caseStatement ~= "':\n";
|
|
if (indentString == "")
|
|
{
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tsize_t i = 0;\n";
|
|
}
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tbuffer[i++] = '";
|
|
caseStatement ~= k;
|
|
caseStatement ~= "';\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t++index;\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\trange.popFront();\n";
|
|
if (v.children.length > 0)
|
|
{
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tif (range.isEoF())\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t{\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value;
|
|
caseStatement ~= ";\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t\tbreak;\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t}\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tswitch (range.front)\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t{\n";
|
|
caseStatement ~= printCaseStatements(v, indentString ~ "\t");
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tdefault:\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t\tcurrent.type = ";
|
|
caseStatement ~= v.value;
|
|
caseStatement ~= ";\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t\tbreak;\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\t}\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tbreak;\n";
|
|
}
|
|
else
|
|
{
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tcurrent.type = ";
|
|
caseStatement ~= v.value;
|
|
caseStatement ~= ";\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n";
|
|
caseStatement ~= indentString;
|
|
caseStatement ~= "\tbreak;\n";
|
|
}
|
|
}
|
|
return caseStatement;
|
|
}
|
|
|
|
string generateCaseTrie(string[] args ...)
|
|
{
|
|
auto t = new Trie!(string, string);
|
|
for(int i = 0; i < args.length; i+=2)
|
|
{
|
|
t.add(args[i], args[i+1]);
|
|
}
|
|
return printCaseStatements(t, "");
|
|
}
|
|
|
|
//void main(string[] args) {}
|