Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

This commit is contained in:
Hackerpilot 2013-02-27 21:01:49 +00:00
commit 1dfeb281d9
1 changed files with 92 additions and 94 deletions

View File

@ -683,30 +683,7 @@ struct TokenRange(LexSrc)
*/
void popFront()
{
// Filter out tokens we don't care about
loop: while (true)
{
advance();
if(empty)
break loop;
switch (current.type)
{
case TokenType.whitespace:
if (config.iterStyle & IterationStyle.includeWhitespace)
break loop;
break;
case TokenType.comment:
if (config.iterStyle & IterationStyle.includeComments)
break loop;
break;
case TokenType.specialTokenSequence:
if (config.iterStyle & IterationStyle.includeSpecialTokens)
break loop;
break;
default:
break loop;
}
}
advance();
}
private:
@ -716,29 +693,26 @@ private:
*/
void advance()
{
if (isEoF())
{
_empty = true;
return;
}
src.mark(); // mark a start of a lexing "frame"
L_advance:
if (src.empty)
{
_empty = true;
return;
}
src.mark(); // mark a start of a lexing "frame"
current.line = lineNumber;
current.startIndex = src.index;
current.column = column;
current.value = null;
if (isWhite())
{
if (config.iterStyle & IterationStyle.includeWhitespace)
lexWhitespace!true();
else
lexWhitespace!false();
return;
}
current.value = null;
switch (src.front)
{
// handle sentenels for end of input
case 0:
case 0x1a:
// TODO: check config flags, it's cheap
// since this branch at most is taken once per file
_empty = true;
return;
// pragma(msg, generateCaseTrie(
mixin(generateCaseTrie(
"=", "TokenType.assign",
@ -813,10 +787,10 @@ private:
case '*':
case '+':
if (config.iterStyle & IterationStyle.includeComments)
lexComment!true();
else
lexComment!false();
return;
return lexComment!true();
lexComment!false();
goto L_advance; // tail-recursion
case '=':
current.type = TokenType.divEqual;
current.value = "/=";
@ -904,13 +878,33 @@ private:
}
else
goto default;
case '#':
lexSpecialTokenSequence();
return;
default:
while(!isEoF() && !isSeparating())
case '#':
lexSpecialTokenSequence();
if(config.iterStyle & IterationStyle.includeSpecialTokens)
return;
goto L_advance; // tail-recursion
// "short" ASCII whites
case 0x20:
case 0x09: .. case 0x0d:
if (config.iterStyle & IterationStyle.includeWhitespace)
return lexWhitespace!true();
lexWhitespace!false();
goto L_advance; // tail-recursion
default:
if ((src.front & 0x80) && isLongWhite())
{
if (config.iterStyle & IterationStyle.includeWhitespace)
return lexWhitespace!true();
lexWhitespace!false();
goto L_advance; // tail-recursion
}
for(;;)
{
if(isSeparating())
break;
nextCharNonLF();
if(isEoF())
break;
}
current.type = lookupTokenType(src.slice);
@ -924,48 +918,9 @@ private:
return;
}
if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
if (config.iterStyle & TokenStyle.doNotReplaceSpecial)
return;
switch (current.type)
{
case TokenType.date:
current.type = TokenType.stringLiteral;
auto time = Clock.currTime();
current.value = format("%s %02d %04d", time.month, time.day, time.year);
return;
case TokenType.time:
auto time = Clock.currTime();
current.type = TokenType.stringLiteral;
current.value = (cast(TimeOfDay)(time)).toISOExtString();
return;
case TokenType.timestamp:
auto time = Clock.currTime();
auto dt = cast(DateTime) time;
current.type = TokenType.stringLiteral;
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
dt.second, dt.year);
return;
case TokenType.vendor:
current.type = TokenType.stringLiteral;
current.value = config.vendorString;
return;
case TokenType.compilerVersion:
current.type = TokenType.stringLiteral;
current.value = format("%d", config.versionNumber);
return;
case TokenType.line:
current.type = TokenType.intLiteral;
current.value = format("%d", current.line);
return;
case TokenType.file:
current.type = TokenType.stringLiteral;
current.value = config.fileName;
return;
default:
return;
}
expandSpecialToken();
}
}
@ -1552,7 +1507,7 @@ private:
import std.stdio;
if(unescaped != Appender!(ubyte[]).init)
{
//stuff in the last slice and used buffered data
//stuff in the last slice and use buffered data
unescaped.put(src.slice);
setData(unescaped.data);
}
@ -2132,9 +2087,52 @@ private:
else
r.popFront();
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
return false;
return false;
return true;
}
void expandSpecialToken()
{
switch (current.type)
{
case TokenType.date:
current.type = TokenType.stringLiteral;
auto time = Clock.currTime();
current.value = format("%s %02d %04d", time.month, time.day, time.year);
return;
case TokenType.time:
auto time = Clock.currTime();
current.type = TokenType.stringLiteral;
current.value = (cast(TimeOfDay)(time)).toISOExtString();
return;
case TokenType.timestamp:
auto time = Clock.currTime();
auto dt = cast(DateTime) time;
current.type = TokenType.stringLiteral;
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
dt.second, dt.year);
return;
case TokenType.vendor:
current.type = TokenType.stringLiteral;
current.value = config.vendorString;
return;
case TokenType.compilerVersion:
current.type = TokenType.stringLiteral;
current.value = format("%d", config.versionNumber);
return;
case TokenType.line:
current.type = TokenType.intLiteral;
current.value = format("%d", current.line);
return;
case TokenType.file:
current.type = TokenType.stringLiteral;
current.value = config.fileName;
return;
default:
return;
}
}
void errorMessage(string s)
{