Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer
This commit is contained in:
commit
1dfeb281d9
180
std/d/lexer.d
180
std/d/lexer.d
|
@ -683,30 +683,7 @@ struct TokenRange(LexSrc)
|
|||
*/
|
||||
void popFront()
|
||||
{
|
||||
// Filter out tokens we don't care about
|
||||
loop: while (true)
|
||||
{
|
||||
advance();
|
||||
if(empty)
|
||||
break loop;
|
||||
switch (current.type)
|
||||
{
|
||||
case TokenType.whitespace:
|
||||
if (config.iterStyle & IterationStyle.includeWhitespace)
|
||||
break loop;
|
||||
break;
|
||||
case TokenType.comment:
|
||||
if (config.iterStyle & IterationStyle.includeComments)
|
||||
break loop;
|
||||
break;
|
||||
case TokenType.specialTokenSequence:
|
||||
if (config.iterStyle & IterationStyle.includeSpecialTokens)
|
||||
break loop;
|
||||
break;
|
||||
default:
|
||||
break loop;
|
||||
}
|
||||
}
|
||||
advance();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -716,29 +693,26 @@ private:
|
|||
*/
|
||||
void advance()
|
||||
{
|
||||
if (isEoF())
|
||||
{
|
||||
_empty = true;
|
||||
return;
|
||||
}
|
||||
|
||||
src.mark(); // mark a start of a lexing "frame"
|
||||
L_advance:
|
||||
if (src.empty)
|
||||
{
|
||||
_empty = true;
|
||||
return;
|
||||
}
|
||||
src.mark(); // mark a start of a lexing "frame"
|
||||
current.line = lineNumber;
|
||||
current.startIndex = src.index;
|
||||
current.column = column;
|
||||
current.value = null;
|
||||
|
||||
if (isWhite())
|
||||
{
|
||||
if (config.iterStyle & IterationStyle.includeWhitespace)
|
||||
lexWhitespace!true();
|
||||
else
|
||||
lexWhitespace!false();
|
||||
return;
|
||||
}
|
||||
|
||||
switch (src.front)
|
||||
{
|
||||
// handle sentenels for end of input
|
||||
case 0:
|
||||
case 0x1a:
|
||||
// TODO: check config flags, it's cheap
|
||||
// since this branch at most is taken once per file
|
||||
_empty = true;
|
||||
return;
|
||||
// pragma(msg, generateCaseTrie(
|
||||
mixin(generateCaseTrie(
|
||||
"=", "TokenType.assign",
|
||||
|
@ -813,10 +787,10 @@ private:
|
|||
case '*':
|
||||
case '+':
|
||||
if (config.iterStyle & IterationStyle.includeComments)
|
||||
lexComment!true();
|
||||
else
|
||||
lexComment!false();
|
||||
return;
|
||||
return lexComment!true();
|
||||
lexComment!false();
|
||||
goto L_advance; // tail-recursion
|
||||
|
||||
case '=':
|
||||
current.type = TokenType.divEqual;
|
||||
current.value = "/=";
|
||||
|
@ -905,12 +879,32 @@ private:
|
|||
else
|
||||
goto default;
|
||||
case '#':
|
||||
lexSpecialTokenSequence();
|
||||
return;
|
||||
lexSpecialTokenSequence();
|
||||
if(config.iterStyle & IterationStyle.includeSpecialTokens)
|
||||
return;
|
||||
goto L_advance; // tail-recursion
|
||||
// "short" ASCII whites
|
||||
case 0x20:
|
||||
case 0x09: .. case 0x0d:
|
||||
if (config.iterStyle & IterationStyle.includeWhitespace)
|
||||
return lexWhitespace!true();
|
||||
lexWhitespace!false();
|
||||
goto L_advance; // tail-recursion
|
||||
default:
|
||||
while(!isEoF() && !isSeparating())
|
||||
if ((src.front & 0x80) && isLongWhite())
|
||||
{
|
||||
if (config.iterStyle & IterationStyle.includeWhitespace)
|
||||
return lexWhitespace!true();
|
||||
lexWhitespace!false();
|
||||
goto L_advance; // tail-recursion
|
||||
}
|
||||
for(;;)
|
||||
{
|
||||
if(isSeparating())
|
||||
break;
|
||||
nextCharNonLF();
|
||||
if(isEoF())
|
||||
break;
|
||||
}
|
||||
|
||||
current.type = lookupTokenType(src.slice);
|
||||
|
@ -924,48 +918,9 @@ private:
|
|||
return;
|
||||
}
|
||||
|
||||
if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
|
||||
if (config.iterStyle & TokenStyle.doNotReplaceSpecial)
|
||||
return;
|
||||
|
||||
switch (current.type)
|
||||
{
|
||||
case TokenType.date:
|
||||
current.type = TokenType.stringLiteral;
|
||||
auto time = Clock.currTime();
|
||||
current.value = format("%s %02d %04d", time.month, time.day, time.year);
|
||||
return;
|
||||
case TokenType.time:
|
||||
auto time = Clock.currTime();
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = (cast(TimeOfDay)(time)).toISOExtString();
|
||||
return;
|
||||
case TokenType.timestamp:
|
||||
auto time = Clock.currTime();
|
||||
auto dt = cast(DateTime) time;
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
|
||||
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
|
||||
dt.second, dt.year);
|
||||
return;
|
||||
case TokenType.vendor:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = config.vendorString;
|
||||
return;
|
||||
case TokenType.compilerVersion:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = format("%d", config.versionNumber);
|
||||
return;
|
||||
case TokenType.line:
|
||||
current.type = TokenType.intLiteral;
|
||||
current.value = format("%d", current.line);
|
||||
return;
|
||||
case TokenType.file:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = config.fileName;
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
expandSpecialToken();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1552,7 +1507,7 @@ private:
|
|||
import std.stdio;
|
||||
if(unescaped != Appender!(ubyte[]).init)
|
||||
{
|
||||
//stuff in the last slice and used buffered data
|
||||
//stuff in the last slice and use buffered data
|
||||
unescaped.put(src.slice);
|
||||
setData(unescaped.data);
|
||||
}
|
||||
|
@ -2132,10 +2087,53 @@ private:
|
|||
else
|
||||
r.popFront();
|
||||
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
|
||||
return false;
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void expandSpecialToken()
|
||||
{
|
||||
switch (current.type)
|
||||
{
|
||||
case TokenType.date:
|
||||
current.type = TokenType.stringLiteral;
|
||||
auto time = Clock.currTime();
|
||||
current.value = format("%s %02d %04d", time.month, time.day, time.year);
|
||||
return;
|
||||
case TokenType.time:
|
||||
auto time = Clock.currTime();
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = (cast(TimeOfDay)(time)).toISOExtString();
|
||||
return;
|
||||
case TokenType.timestamp:
|
||||
auto time = Clock.currTime();
|
||||
auto dt = cast(DateTime) time;
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = format("%s %s %02d %02d:%02d:%02d %04d",
|
||||
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
|
||||
dt.second, dt.year);
|
||||
return;
|
||||
case TokenType.vendor:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = config.vendorString;
|
||||
return;
|
||||
case TokenType.compilerVersion:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = format("%d", config.versionNumber);
|
||||
return;
|
||||
case TokenType.line:
|
||||
current.type = TokenType.intLiteral;
|
||||
current.value = format("%d", current.line);
|
||||
return;
|
||||
case TokenType.file:
|
||||
current.type = TokenType.stringLiteral;
|
||||
current.value = config.fileName;
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void errorMessage(string s)
|
||||
{
|
||||
import std.string: format;
|
||||
|
|
Loading…
Reference in New Issue