Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

This commit is contained in:
Hackerpilot 2013-02-27 21:01:49 +00:00
commit 1dfeb281d9
1 changed files with 92 additions and 94 deletions
std/d

View File

@ -683,30 +683,7 @@ struct TokenRange(LexSrc)
*/ */
void popFront() void popFront()
{ {
// Filter out tokens we don't care about advance();
loop: while (true)
{
advance();
if(empty)
break loop;
switch (current.type)
{
case TokenType.whitespace:
if (config.iterStyle & IterationStyle.includeWhitespace)
break loop;
break;
case TokenType.comment:
if (config.iterStyle & IterationStyle.includeComments)
break loop;
break;
case TokenType.specialTokenSequence:
if (config.iterStyle & IterationStyle.includeSpecialTokens)
break loop;
break;
default:
break loop;
}
}
} }
private: private:
@ -716,29 +693,26 @@ private:
*/ */
void advance() void advance()
{ {
if (isEoF()) L_advance:
{ if (src.empty)
_empty = true; {
return; _empty = true;
} return;
}
src.mark(); // mark a start of a lexing "frame" src.mark(); // mark a start of a lexing "frame"
current.line = lineNumber; current.line = lineNumber;
current.startIndex = src.index; current.startIndex = src.index;
current.column = column; current.column = column;
current.value = null; current.value = null;
if (isWhite())
{
if (config.iterStyle & IterationStyle.includeWhitespace)
lexWhitespace!true();
else
lexWhitespace!false();
return;
}
switch (src.front) switch (src.front)
{ {
// handle sentenels for end of input
case 0:
case 0x1a:
// TODO: check config flags, it's cheap
// since this branch at most is taken once per file
_empty = true;
return;
// pragma(msg, generateCaseTrie( // pragma(msg, generateCaseTrie(
mixin(generateCaseTrie( mixin(generateCaseTrie(
"=", "TokenType.assign", "=", "TokenType.assign",
@ -813,10 +787,10 @@ private:
case '*': case '*':
case '+': case '+':
if (config.iterStyle & IterationStyle.includeComments) if (config.iterStyle & IterationStyle.includeComments)
lexComment!true(); return lexComment!true();
else lexComment!false();
lexComment!false(); goto L_advance; // tail-recursion
return;
case '=': case '=':
current.type = TokenType.divEqual; current.type = TokenType.divEqual;
current.value = "/="; current.value = "/=";
@ -905,12 +879,32 @@ private:
else else
goto default; goto default;
case '#': case '#':
lexSpecialTokenSequence(); lexSpecialTokenSequence();
return; if(config.iterStyle & IterationStyle.includeSpecialTokens)
return;
goto L_advance; // tail-recursion
// "short" ASCII whites
case 0x20:
case 0x09: .. case 0x0d:
if (config.iterStyle & IterationStyle.includeWhitespace)
return lexWhitespace!true();
lexWhitespace!false();
goto L_advance; // tail-recursion
default: default:
while(!isEoF() && !isSeparating()) if ((src.front & 0x80) && isLongWhite())
{
if (config.iterStyle & IterationStyle.includeWhitespace)
return lexWhitespace!true();
lexWhitespace!false();
goto L_advance; // tail-recursion
}
for(;;)
{ {
if(isSeparating())
break;
nextCharNonLF(); nextCharNonLF();
if(isEoF())
break;
} }
current.type = lookupTokenType(src.slice); current.type = lookupTokenType(src.slice);
@ -924,48 +918,9 @@ private:
return; return;
} }
if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial)) if (config.iterStyle & TokenStyle.doNotReplaceSpecial)
return; return;
expandSpecialToken();
switch (current.type)
{
case TokenType.date:
current.type = TokenType.stringLiteral;
auto time = Clock.currTime();
current.value = format("%s %02d %04d", time.month, time.day, time.year);
return;
case TokenType.time:
auto time = Clock.currTime();
current.type = TokenType.stringLiteral;
current.value = (cast(TimeOfDay)(time)).toISOExtString();
return;
case TokenType.timestamp:
auto time = Clock.currTime();
auto dt = cast(DateTime) time;
current.type = TokenType.stringLiteral;
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
dt.second, dt.year);
return;
case TokenType.vendor:
current.type = TokenType.stringLiteral;
current.value = config.vendorString;
return;
case TokenType.compilerVersion:
current.type = TokenType.stringLiteral;
current.value = format("%d", config.versionNumber);
return;
case TokenType.line:
current.type = TokenType.intLiteral;
current.value = format("%d", current.line);
return;
case TokenType.file:
current.type = TokenType.stringLiteral;
current.value = config.fileName;
return;
default:
return;
}
} }
} }
@ -1552,7 +1507,7 @@ private:
import std.stdio; import std.stdio;
if(unescaped != Appender!(ubyte[]).init) if(unescaped != Appender!(ubyte[]).init)
{ {
//stuff in the last slice and used buffered data //stuff in the last slice and use buffered data
unescaped.put(src.slice); unescaped.put(src.slice);
setData(unescaped.data); setData(unescaped.data);
} }
@ -2132,10 +2087,53 @@ private:
else else
r.popFront(); r.popFront();
if (r.empty || (r.front != 0xa8 && r.front != 0xa9)) if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
return false; return false;
return true; return true;
} }
void expandSpecialToken()
{
switch (current.type)
{
case TokenType.date:
current.type = TokenType.stringLiteral;
auto time = Clock.currTime();
current.value = format("%s %02d %04d", time.month, time.day, time.year);
return;
case TokenType.time:
auto time = Clock.currTime();
current.type = TokenType.stringLiteral;
current.value = (cast(TimeOfDay)(time)).toISOExtString();
return;
case TokenType.timestamp:
auto time = Clock.currTime();
auto dt = cast(DateTime) time;
current.type = TokenType.stringLiteral;
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
dt.second, dt.year);
return;
case TokenType.vendor:
current.type = TokenType.stringLiteral;
current.value = config.vendorString;
return;
case TokenType.compilerVersion:
current.type = TokenType.stringLiteral;
current.value = format("%d", config.versionNumber);
return;
case TokenType.line:
current.type = TokenType.intLiteral;
current.value = format("%d", current.line);
return;
case TokenType.file:
current.type = TokenType.stringLiteral;
current.value = config.fileName;
return;
default:
return;
}
}
void errorMessage(string s) void errorMessage(string s)
{ {
import std.string: format; import std.string: format;