Tokenizer is somewhat functional
This commit is contained in:
parent
e3c737f6e1
commit
a7f81c57cc
|
@ -53,15 +53,13 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
|
||||||
caseStatement ~= k;
|
caseStatement ~= k;
|
||||||
caseStatement ~= "';\n";
|
caseStatement ~= "';\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\tcurrent.lineNumber = lineNumber;\n";
|
|
||||||
caseStatement ~= indentString;
|
|
||||||
caseStatement ~= "\t++index;\n";
|
caseStatement ~= "\t++index;\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\tinput.popFront();\n";
|
caseStatement ~= "\trange.popFront();\n";
|
||||||
if (v.children.length > 0)
|
if (v.children.length > 0)
|
||||||
{
|
{
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\tif (isEoF(inputString, endIndex))\n";
|
caseStatement ~= "\tif (range.isEoF())\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\t{\n";
|
caseStatement ~= "\t{\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
|
@ -72,7 +70,7 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\t}\n";
|
caseStatement ~= "\t}\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\tswitch (input.front)\n";
|
caseStatement ~= "\tswitch (range.front)\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\t{\n";
|
caseStatement ~= "\t{\n";
|
||||||
caseStatement ~= printCaseStatements(v, indentString ~ "\t");
|
caseStatement ~= printCaseStatements(v, indentString ~ "\t");
|
||||||
|
|
19
langutils.d
19
langutils.d
|
@ -110,7 +110,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
|
||||||
case 5:
|
case 5:
|
||||||
switch (input)
|
switch (input)
|
||||||
{
|
{
|
||||||
case "@safe": return TokenType.AtSafe;
|
|
||||||
case "alias": return TokenType.Alias;
|
case "alias": return TokenType.Alias;
|
||||||
case "align": return TokenType.Align;
|
case "align": return TokenType.Align;
|
||||||
case "break": return TokenType.Break;
|
case "break": return TokenType.Break;
|
||||||
|
@ -169,7 +168,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
|
||||||
case 7:
|
case 7:
|
||||||
switch (input)
|
switch (input)
|
||||||
{
|
{
|
||||||
case "@system": return TokenType.AtSystem;
|
|
||||||
case "cdouble": return TokenType.Cdouble;
|
case "cdouble": return TokenType.Cdouble;
|
||||||
case "default": return TokenType.Default;
|
case "default": return TokenType.Default;
|
||||||
case "dstring": return TokenType.DString;
|
case "dstring": return TokenType.DString;
|
||||||
|
@ -196,9 +194,7 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
|
||||||
case "__thread": return TokenType.Thread;
|
case "__thread": return TokenType.Thread;
|
||||||
case "__traits": return TokenType.Traits;
|
case "__traits": return TokenType.Traits;
|
||||||
case "volatile": return TokenType.Volatile;
|
case "volatile": return TokenType.Volatile;
|
||||||
case "@trusted": return TokenType.AtTrusted;
|
|
||||||
case "delegate": return TokenType.Delegate;
|
case "delegate": return TokenType.Delegate;
|
||||||
case "@disable": return TokenType.AtDisable;
|
|
||||||
case "function": return TokenType.Function;
|
case "function": return TokenType.Function;
|
||||||
case "unittest": return TokenType.Unittest;
|
case "unittest": return TokenType.Unittest;
|
||||||
case "__FILE__": return TokenType.File;
|
case "__FILE__": return TokenType.File;
|
||||||
|
@ -209,7 +205,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
|
||||||
switch (input)
|
switch (input)
|
||||||
{
|
{
|
||||||
case "__gshared": return TokenType.Gshared;
|
case "__gshared": return TokenType.Gshared;
|
||||||
case "@property": return TokenType.AtProperty;
|
|
||||||
case "immutable": return TokenType.Immutable;
|
case "immutable": return TokenType.Immutable;
|
||||||
case "interface": return TokenType.Interface;
|
case "interface": return TokenType.Interface;
|
||||||
case "invariant": return TokenType.Invariant;
|
case "invariant": return TokenType.Invariant;
|
||||||
|
@ -243,6 +238,7 @@ enum TokenType: uint
|
||||||
// Operators
|
// Operators
|
||||||
OPERATORS_BEGIN,
|
OPERATORS_BEGIN,
|
||||||
Assign, /// =
|
Assign, /// =
|
||||||
|
At, /// @
|
||||||
BitAnd, /// &
|
BitAnd, /// &
|
||||||
BitAndEquals, /// &=
|
BitAndEquals, /// &=
|
||||||
BitOr, /// |
|
BitOr, /// |
|
||||||
|
@ -433,14 +429,6 @@ enum TokenType: uint
|
||||||
Traits, /// __traits,
|
Traits, /// __traits,
|
||||||
CONSTANTS_END,
|
CONSTANTS_END,
|
||||||
|
|
||||||
// Properties
|
|
||||||
PROPERTIES_BEGIN,
|
|
||||||
AtProperty, /// @property
|
|
||||||
AtSafe, /// @safe
|
|
||||||
AtSystem, /// @system
|
|
||||||
AtTrusted, /// @trusted
|
|
||||||
PROPERTIES_END,
|
|
||||||
|
|
||||||
// Misc
|
// Misc
|
||||||
MISC_BEGIN,
|
MISC_BEGIN,
|
||||||
Blank, /// unknown token type
|
Blank, /// unknown token type
|
||||||
|
@ -505,7 +493,6 @@ static this()
|
||||||
"delegate" : TokenType.Delegate,
|
"delegate" : TokenType.Delegate,
|
||||||
"delete" : TokenType.Delete,
|
"delete" : TokenType.Delete,
|
||||||
"deprecated" : TokenType.Deprecated,
|
"deprecated" : TokenType.Deprecated,
|
||||||
"@disable" : TokenType.AtDisable,
|
|
||||||
"do" : TokenType.Do,
|
"do" : TokenType.Do,
|
||||||
"double" : TokenType.Double,
|
"double" : TokenType.Double,
|
||||||
"dstring" : TokenType.DString,
|
"dstring" : TokenType.DString,
|
||||||
|
@ -550,14 +537,12 @@ static this()
|
||||||
"package" : TokenType.Package,
|
"package" : TokenType.Package,
|
||||||
"pragma" : TokenType.Pragma,
|
"pragma" : TokenType.Pragma,
|
||||||
"private" : TokenType.Private,
|
"private" : TokenType.Private,
|
||||||
"@property" : TokenType.AtProperty,
|
|
||||||
"protected" : TokenType.Protected,
|
"protected" : TokenType.Protected,
|
||||||
"public" : TokenType.Public,
|
"public" : TokenType.Public,
|
||||||
"pure" : TokenType.Pure,
|
"pure" : TokenType.Pure,
|
||||||
"real" : TokenType.Real,
|
"real" : TokenType.Real,
|
||||||
"ref" : TokenType.Ref,
|
"ref" : TokenType.Ref,
|
||||||
"return" : TokenType.Return,
|
"return" : TokenType.Return,
|
||||||
"@safe" : TokenType.AtSafe,
|
|
||||||
"scope" : TokenType.Scope,
|
"scope" : TokenType.Scope,
|
||||||
"shared" : TokenType.Shared,
|
"shared" : TokenType.Shared,
|
||||||
"short" : TokenType.Short,
|
"short" : TokenType.Short,
|
||||||
|
@ -567,14 +552,12 @@ static this()
|
||||||
"super" : TokenType.Super,
|
"super" : TokenType.Super,
|
||||||
"switch" : TokenType.Switch,
|
"switch" : TokenType.Switch,
|
||||||
"synchronized" : TokenType.Synchronized,
|
"synchronized" : TokenType.Synchronized,
|
||||||
"@system" : TokenType.AtSystem,
|
|
||||||
"template" : TokenType.Template,
|
"template" : TokenType.Template,
|
||||||
"this" : TokenType.This,
|
"this" : TokenType.This,
|
||||||
"__thread" : TokenType.Thread,
|
"__thread" : TokenType.Thread,
|
||||||
"throw" : TokenType.Throw,
|
"throw" : TokenType.Throw,
|
||||||
"__traits" : TokenType.Traits,
|
"__traits" : TokenType.Traits,
|
||||||
"true" : TokenType.True,
|
"true" : TokenType.True,
|
||||||
"@trusted" : TokenType.AtTrusted,
|
|
||||||
"try" : TokenType.Try,
|
"try" : TokenType.Try,
|
||||||
"typedef" : TokenType.Typedef,
|
"typedef" : TokenType.Typedef,
|
||||||
"typeid" : TokenType.Typeid,
|
"typeid" : TokenType.Typeid,
|
||||||
|
|
484
tokenizer.d
484
tokenizer.d
|
@ -14,6 +14,7 @@ import std.uni;
|
||||||
import std.stdio;
|
import std.stdio;
|
||||||
import std.ascii;
|
import std.ascii;
|
||||||
import std.format;
|
import std.format;
|
||||||
|
import std.exception;
|
||||||
|
|
||||||
import langutils;
|
import langutils;
|
||||||
import codegen;
|
import codegen;
|
||||||
|
@ -29,9 +30,9 @@ pure bool isEoF(R)(R range)
|
||||||
return range.empty || range.front == 0 || range.front == 0x1a;
|
return range.empty || range.front == 0 || range.front == 0x1a;
|
||||||
}
|
}
|
||||||
|
|
||||||
char[] popNewline(R)(ref R range, ref uint index)
|
C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index) if (isSomeChar!C && isForwardRange!R)
|
||||||
{
|
{
|
||||||
char[] chars;
|
C[] chars;
|
||||||
if (range.front == '\r')
|
if (range.front == '\r')
|
||||||
{
|
{
|
||||||
chars ~= range.front;
|
chars ~= range.front;
|
||||||
|
@ -58,13 +59,14 @@ unittest
|
||||||
/**
|
/**
|
||||||
* Returns:
|
* Returns:
|
||||||
*/
|
*/
|
||||||
Token lexWhitespace(R)(ref R range, ref uint index, ref uint lineNumber)
|
Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index, ref uint lineNumber)
|
||||||
|
if (isForwardRange!R && isSomeChar!C)
|
||||||
{
|
{
|
||||||
Token t;
|
Token t;
|
||||||
t.type = TokenType.Whitespace;
|
t.type = TokenType.Whitespace;
|
||||||
t.lineNumber = lineNumber;
|
t.lineNumber = lineNumber;
|
||||||
t.startIndex = index;
|
t.startIndex = index;
|
||||||
auto app = appender!(char[])();
|
auto app = appender!(C[])();
|
||||||
while (!isEoF(range) && std.uni.isWhite(range.front))
|
while (!isEoF(range) && std.uni.isWhite(range.front))
|
||||||
{
|
{
|
||||||
if (isNewline(range))
|
if (isNewline(range))
|
||||||
|
@ -104,7 +106,8 @@ unittest
|
||||||
* lineNumber = the line number that corresponds to endIndex
|
* lineNumber = the line number that corresponds to endIndex
|
||||||
* Returns: The comment
|
* Returns: The comment
|
||||||
*/
|
*/
|
||||||
Token lexComment(R)(ref R input, ref uint index, ref uint lineNumber)
|
Token lexComment(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber)
|
||||||
|
if (isSomeChar!C && isForwardRange!R)
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
assert (input.front == '/');
|
assert (input.front == '/');
|
||||||
|
@ -115,7 +118,7 @@ body
|
||||||
t.lineNumber = lineNumber;
|
t.lineNumber = lineNumber;
|
||||||
t.type = TokenType.Comment;
|
t.type = TokenType.Comment;
|
||||||
t.startIndex = index;
|
t.startIndex = index;
|
||||||
auto app = appender!(char[])();
|
auto app = appender!(C[])();
|
||||||
app.put(input.front);
|
app.put(input.front);
|
||||||
input.popFront();
|
input.popFront();
|
||||||
switch(input.front)
|
switch(input.front)
|
||||||
|
@ -252,10 +255,10 @@ unittest
|
||||||
/**
|
/**
|
||||||
* Pops up to upTo hex chars from the input range and returns them as a string
|
* Pops up to upTo hex chars from the input range and returns them as a string
|
||||||
*/
|
*/
|
||||||
string popDigitChars(R, alias isInterestingDigit)(ref R input, ref uint index,
|
string popDigitChars(R, C = ElementType!R, alias isInterestingDigit)(ref R input, ref uint index,
|
||||||
uint upTo)
|
uint upTo) if (isSomeChar!C && isForwardRange!R)
|
||||||
{
|
{
|
||||||
auto app = appender!(char[])();
|
auto app = appender!(C[])();
|
||||||
for (uint i = 0; i != upTo; ++i)
|
for (uint i = 0; i != upTo; ++i)
|
||||||
{
|
{
|
||||||
if (isInterestingDigit(input.front))
|
if (isInterestingDigit(input.front))
|
||||||
|
@ -271,12 +274,12 @@ string popDigitChars(R, alias isInterestingDigit)(ref R input, ref uint index,
|
||||||
|
|
||||||
string popHexChars(R)(ref R input, ref uint index, uint upTo)
|
string popHexChars(R)(ref R input, ref uint index, uint upTo)
|
||||||
{
|
{
|
||||||
return popDigitChars!(R, isHexDigit)(input, index, upTo);
|
return popDigitChars!(R, ElementType!R, isHexDigit)(input, index, upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
string popOctalChars(R)(ref R input, ref uint index, uint upTo)
|
string popOctalChars(R)(ref R input, ref uint index, uint upTo)
|
||||||
{
|
{
|
||||||
return popDigitChars!(R, isOctalDigit)(input, index, upTo);
|
return popDigitChars!(R, ElementType!R, isOctalDigit)(input, index, upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
@ -297,7 +300,8 @@ unittest
|
||||||
assert (rc == "00123");
|
assert (rc == "00123");
|
||||||
}
|
}
|
||||||
|
|
||||||
string interpretEscapeSequence(R)(ref R input, ref uint index)
|
string interpretEscapeSequence(R, C = ElementType!R)(ref R input, ref uint index)
|
||||||
|
if (isSomeChar!C && isForwardRange!R)
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
assert(input.front == '\\');
|
assert(input.front == '\\');
|
||||||
|
@ -391,17 +395,8 @@ unittest
|
||||||
assert (interpretEscapeSequence(k, i) == v);
|
assert (interpretEscapeSequence(k, i) == v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
Token lexString(R)(ref R input, ref uint index, ref uint lineNumber,
|
||||||
* Params:
|
const StringStyle style = StringStyle.Escaped)
|
||||||
* inputString = the source code to examine
|
|
||||||
* endIndex = an index into inputString at the opening quote
|
|
||||||
* lineNumber = the line number that corresponds to endIndex
|
|
||||||
* quote = the opening (and closing) quote character for the string to be
|
|
||||||
* lexed
|
|
||||||
* Returns: a string literal, including its opening and closing quote characters
|
|
||||||
*/
|
|
||||||
Token lexString(R)(ref R input, ref uint lineNumber, ref uint index,
|
|
||||||
bool canEscape = true)
|
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
assert (input.front == '\'' || input.front == '"' || input.front == '`');
|
assert (input.front == '\'' || input.front == '"' || input.front == '`');
|
||||||
|
@ -411,10 +406,13 @@ body
|
||||||
Token t;
|
Token t;
|
||||||
t.lineNumber = lineNumber;
|
t.lineNumber = lineNumber;
|
||||||
t.startIndex = index;
|
t.startIndex = index;
|
||||||
|
t.type = TokenType.StringLiteral;
|
||||||
auto quote = input.front;
|
auto quote = input.front;
|
||||||
input.popFront();
|
input.popFront();
|
||||||
++index;
|
++index;
|
||||||
auto app = appender!(char[])();
|
auto app = appender!(char[])();
|
||||||
|
if (style & StringStyle.IncludeQuotes)
|
||||||
|
app.put(quote);
|
||||||
while (!isEoF(input))
|
while (!isEoF(input))
|
||||||
{
|
{
|
||||||
if (isNewline(input))
|
if (isNewline(input))
|
||||||
|
@ -422,10 +420,12 @@ body
|
||||||
app.put(popNewline(input, index));
|
app.put(popNewline(input, index));
|
||||||
lineNumber++;
|
lineNumber++;
|
||||||
}
|
}
|
||||||
else if (input.front == '\\' && canEscape)
|
else if (input.front == '\\' && style & StringStyle.Escaped)
|
||||||
app.put(interpretEscapeSequence(input, index));
|
app.put(interpretEscapeSequence(input, index));
|
||||||
else if (input.front == quote)
|
else if (input.front == quote)
|
||||||
{
|
{
|
||||||
|
if (style & StringStyle.IncludeQuotes)
|
||||||
|
app.put(quote);
|
||||||
input.popFront();
|
input.popFront();
|
||||||
++index;
|
++index;
|
||||||
break;
|
break;
|
||||||
|
@ -443,20 +443,17 @@ body
|
||||||
{
|
{
|
||||||
case 'w':
|
case 'w':
|
||||||
t.type = TokenType.WStringLiteral;
|
t.type = TokenType.WStringLiteral;
|
||||||
input.popFront();
|
goto case 'c';
|
||||||
++index;
|
|
||||||
break;
|
|
||||||
case 'd':
|
case 'd':
|
||||||
t.type = TokenType.DStringLiteral;
|
t.type = TokenType.DStringLiteral;
|
||||||
|
goto case 'c';
|
||||||
|
case 'c':
|
||||||
|
if (style & StringStyle.IncludeQuotes)
|
||||||
|
app.put(input.front);
|
||||||
input.popFront();
|
input.popFront();
|
||||||
++index;
|
++index;
|
||||||
break;
|
break;
|
||||||
case 'c':
|
|
||||||
input.popFront();
|
|
||||||
++index;
|
|
||||||
goto default;
|
|
||||||
default:
|
default:
|
||||||
t.type = TokenType.StringLiteral;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -473,7 +470,7 @@ unittest
|
||||||
auto b = "\"ab\\ncd\"";
|
auto b = "\"ab\\ncd\"";
|
||||||
assert (lexString(b, i, l) == "ab\ncd");
|
assert (lexString(b, i, l) == "ab\ncd");
|
||||||
auto c = "`abc\\ndef`";
|
auto c = "`abc\\ndef`";
|
||||||
assert (lexString(c, i, l, false) == "abc\\ndef");
|
assert (lexString(c, i, l, StringStyle.NotEscaped) == "abc\\ndef");
|
||||||
auto d = `"12345"w`;
|
auto d = `"12345"w`;
|
||||||
assert (lexString(d, i, l).type == TokenType.WStringLiteral);
|
assert (lexString(d, i, l).type == TokenType.WStringLiteral);
|
||||||
auto e = `"abc"c`;
|
auto e = `"abc"c`;
|
||||||
|
@ -1091,32 +1088,214 @@ pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
|
||||||
enum IterationStyle
|
enum IterationStyle
|
||||||
{
|
{
|
||||||
/// Only include code, not whitespace or comments
|
/// Only include code, not whitespace or comments
|
||||||
CODE_ONLY,
|
CodeOnly = 0,
|
||||||
|
/// Includes comments
|
||||||
|
IncludeComments = 1,
|
||||||
|
/// Includes whitespace
|
||||||
|
IncludeWhitespace = 2 << 1,
|
||||||
/// Include everything
|
/// Include everything
|
||||||
EVERYTHING
|
Everything = IncludeComments | IncludeWhitespace
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TokenRange(R) if (isInputRange(R))
|
/**
|
||||||
|
* Configuration of the token lexing style
|
||||||
|
*/
|
||||||
|
enum StringStyle : uint
|
||||||
|
{
|
||||||
|
NotEscaped = 0,
|
||||||
|
/// String escape sequences will be processed and enclosing quote characters
|
||||||
|
/// will not be preserved.
|
||||||
|
Escaped = 1,
|
||||||
|
/// Strings will be read exactly as they appeared in the source, including
|
||||||
|
/// their opening and closing quote characters. Useful for syntax highlighting.
|
||||||
|
IncludeQuotes = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenRange!(R) byToken(R)(ref R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
|
||||||
|
const StringStyle tokenStyle = StringStyle.Escaped) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
|
||||||
|
{
|
||||||
|
auto r = TokenRange!(R)(range);
|
||||||
|
r.tokenStyle = tokenStyle;
|
||||||
|
r.iterStyle = iterationStyle;
|
||||||
|
r.lineNumber = 1;
|
||||||
|
r.popFront();
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
|
||||||
{
|
{
|
||||||
this(ref R range)
|
this(ref R range)
|
||||||
{
|
{
|
||||||
this.range = range;
|
this.range = range;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool empty() const @property
|
bool empty() @property
|
||||||
{
|
{
|
||||||
return _empty;
|
return _empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
Token front() const @property
|
Token front() const @property
|
||||||
{
|
{
|
||||||
|
enforce(!_empty, "Cannot call popFront() on empty token range");
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
Token popFront()
|
Token popFront()
|
||||||
{
|
{
|
||||||
Token c = current;
|
if (range.isEoF())
|
||||||
|
{
|
||||||
|
_empty = true;
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
Token c = current;
|
||||||
|
current = Token.init;
|
||||||
|
current.lineNumber = lineNumber;
|
||||||
|
current.startIndex = index;
|
||||||
|
|
||||||
|
while (std.uni.isWhite(range.front))
|
||||||
|
{
|
||||||
|
if (iterStyle == IterationStyle.Everything)
|
||||||
|
{
|
||||||
|
current = lexWhitespace(range, index, lineNumber);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
lexWhitespace(range, index, lineNumber);
|
||||||
|
}
|
||||||
|
outer: switch (range.front)
|
||||||
|
{
|
||||||
|
mixin(generateCaseTrie(
|
||||||
|
"=", "TokenType.Assign",
|
||||||
|
"&", "TokenType.BitAnd",
|
||||||
|
"&=", "TokenType.BitAndEquals",
|
||||||
|
"|", "TokenType.BitOr",
|
||||||
|
"|=", "TokenType.BitOrEquals",
|
||||||
|
"~=", "TokenType.CatEquals",
|
||||||
|
":", "TokenType.Colon",
|
||||||
|
",", "TokenType.Comma",
|
||||||
|
"$", "TokenType.Dollar",
|
||||||
|
".", "TokenType.Dot",
|
||||||
|
"==", "TokenType.Equals",
|
||||||
|
"=>", "TokenType.GoesTo",
|
||||||
|
">", "TokenType.Greater",
|
||||||
|
">=", "TokenType.GreaterEqual",
|
||||||
|
"#", "TokenType.Hash",
|
||||||
|
"&&", "TokenType.LogicAnd",
|
||||||
|
"{", "TokenType.LBrace",
|
||||||
|
"[", "TokenType.LBracket",
|
||||||
|
"<", "TokenType.Less",
|
||||||
|
"<=", "TokenType.LessEqual",
|
||||||
|
"<>=", "TokenType.LessEqualGreater",
|
||||||
|
"<>", "TokenType.LessOrGreater",
|
||||||
|
"||", "TokenType.LogicOr",
|
||||||
|
"(", "TokenType.LParen",
|
||||||
|
"-", "TokenType.Minus",
|
||||||
|
"-=", "TokenType.MinusEquals",
|
||||||
|
"%", "TokenType.Mod",
|
||||||
|
"%=", "TokenType.ModEquals",
|
||||||
|
"*=", "TokenType.MulEquals",
|
||||||
|
"!", "TokenType.Not",
|
||||||
|
"!=", "TokenType.NotEquals",
|
||||||
|
"!>", "TokenType.NotGreater",
|
||||||
|
"!>=", "TokenType.NotGreaterEqual",
|
||||||
|
"!<", "TokenType.NotLess",
|
||||||
|
"!<=", "TokenType.NotLessEqual",
|
||||||
|
"!<>", "TokenType.NotLessEqualGreater",
|
||||||
|
"+", "TokenType.Plus",
|
||||||
|
"+=", "TokenType.PlusEquals",
|
||||||
|
"^^", "TokenType.Pow",
|
||||||
|
"^^=", "TokenType.PowEquals",
|
||||||
|
"}", "TokenType.RBrace",
|
||||||
|
"]", "TokenType.RBracket",
|
||||||
|
")", "TokenType.RParen",
|
||||||
|
";", "TokenType.Semicolon",
|
||||||
|
"<<", "TokenType.ShiftLeft",
|
||||||
|
"<<=", "TokenType.ShiftLeftEqual",
|
||||||
|
">>", "TokenType.ShiftRight",
|
||||||
|
">>=", "TokenType.ShiftRightEqual",
|
||||||
|
"..", "TokenType.Slice",
|
||||||
|
"*", "TokenType.Star",
|
||||||
|
"?", "TokenType.Ternary",
|
||||||
|
"~", "TokenType.Tilde",
|
||||||
|
"--", "TokenType.Decrement",
|
||||||
|
"!<>=", "TokenType.Unordered",
|
||||||
|
">>>", "TokenType.UnsignedShiftRight",
|
||||||
|
">>>=", "TokenType.UnsignedShiftRightEqual",
|
||||||
|
"++", "TokenType.Increment",
|
||||||
|
"...", "TokenType.Vararg",
|
||||||
|
"^", "TokenType.Xor",
|
||||||
|
"^=", "TokenType.XorEquals",
|
||||||
|
"@", "TokenType.At",
|
||||||
|
));
|
||||||
|
case '0': .. case '9':
|
||||||
|
current = lexNumber(range, index, lineNumber);
|
||||||
|
break;
|
||||||
|
case '\'':
|
||||||
|
case '"':
|
||||||
|
current = lexString(range, index, lineNumber);
|
||||||
|
break;
|
||||||
|
case '`':
|
||||||
|
current = lexString(range, index, lineNumber, StringStyle.NotEscaped);
|
||||||
|
break;
|
||||||
|
case 'q':
|
||||||
|
auto r = range.save;
|
||||||
|
r.popFront();
|
||||||
|
if (!r.isEoF() && r.front == '{')
|
||||||
|
writeln("ParseTokenString");
|
||||||
|
else
|
||||||
|
goto default;
|
||||||
|
case '/':
|
||||||
|
auto r = range.save();
|
||||||
|
r.popFront();
|
||||||
|
if (r.isEoF())
|
||||||
|
{
|
||||||
|
current.type = TokenType.Div;
|
||||||
|
current.value = "/";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (r.front)
|
||||||
|
{
|
||||||
|
case '/':
|
||||||
|
case '*':
|
||||||
|
case '+':
|
||||||
|
current = lexComment(range, index, lineNumber);
|
||||||
|
break outer;
|
||||||
|
case '=':
|
||||||
|
current.type = TokenType.DivEquals;
|
||||||
|
current.value = "/=";
|
||||||
|
break outer;
|
||||||
|
default:
|
||||||
|
current.type = TokenType.Div;
|
||||||
|
current.value = "/";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
auto r = range.save();
|
||||||
|
r.popFront();
|
||||||
|
if (!r.isEoF() && r.front == '"')
|
||||||
|
writeln("parse wysiwyg string");
|
||||||
|
else
|
||||||
|
goto default;
|
||||||
|
case 'x':
|
||||||
|
auto r = range.save();
|
||||||
|
r.popFront();
|
||||||
|
if (!r.isEoF() && r.front == '"')
|
||||||
|
writeln("parse hex string");
|
||||||
|
else
|
||||||
|
goto default;
|
||||||
|
default:
|
||||||
|
auto app = appender!(ElementType!(R)[])();
|
||||||
|
while(!range.isEoF() && !isSeparating(range.front))
|
||||||
|
{
|
||||||
|
app.put(range.front);
|
||||||
|
range.popFront();
|
||||||
|
}
|
||||||
|
current.value = to!string(app.data);
|
||||||
|
current.type = lookupTokenTypeOptimized(current.value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1126,226 +1305,13 @@ private:
|
||||||
uint index;
|
uint index;
|
||||||
R range;
|
R range;
|
||||||
bool _empty;
|
bool _empty;
|
||||||
|
IterationStyle iterStyle;
|
||||||
|
StringStyle tokenStyle;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyle.CODE_ONLY)
|
unittest
|
||||||
// if (isSomeString!S)
|
{
|
||||||
//{
|
auto c = ">><==>)(*)\"TestString\"if import ifire 0,10.4f `\n`@property void//comment\ntest/* comment *//+comment/+moar comment+/+/";
|
||||||
// auto tokenAppender = appender!(Token[])();
|
foreach (t; byToken(c))
|
||||||
//
|
writeln(t);
|
||||||
// // This is very likely a local maximum, but it does seem to take a few
|
}
|
||||||
// // milliseconds off of the run time
|
|
||||||
// tokenAppender.reserve(inputString.length / 4);
|
|
||||||
//
|
|
||||||
// size_t endIndex = 0;
|
|
||||||
// uint lineNumber = 1;
|
|
||||||
//
|
|
||||||
// if (inputString.length > 1 && inputString[0..2] == "#!")
|
|
||||||
// {
|
|
||||||
// Token currentToken;
|
|
||||||
// currentToken.lineNumber = lineNumber; // lineNumber is always 1
|
|
||||||
// currentToken.value = lexScriptLine(inputString, endIndex, lineNumber);
|
|
||||||
// currentToken.type = TokenType.ScriptLine;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// while (!isEoF(inputString, endIndex))
|
|
||||||
// {
|
|
||||||
// size_t prevIndex = endIndex;
|
|
||||||
// Token currentToken;
|
|
||||||
// auto startIndex = endIndex;
|
|
||||||
// if (isWhite(inputString[endIndex]))
|
|
||||||
// {
|
|
||||||
// if (iterationStyle == IterationStyle.EVERYTHING)
|
|
||||||
// {
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value = lexWhitespace(inputString, endIndex,
|
|
||||||
// lineNumber);
|
|
||||||
// currentToken.type = TokenType.Whitespace;
|
|
||||||
// tokenAppender.put(currentToken);
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// lexWhitespace(inputString, endIndex, lineNumber);
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
// currentToken.startIndex = endIndex;
|
|
||||||
//
|
|
||||||
// outerSwitch: switch(inputString[endIndex])
|
|
||||||
// {
|
|
||||||
// mixin(generateCaseTrie(
|
|
||||||
// "=", "TokenType.Assign",
|
|
||||||
// "&", "TokenType.BitAnd",
|
|
||||||
// "&=", "TokenType.BitAndEquals",
|
|
||||||
// "|", "TokenType.BitOr",
|
|
||||||
// "|=", "TokenType.BitOrEquals",
|
|
||||||
// "~=", "TokenType.CatEquals",
|
|
||||||
// ":", "TokenType.Colon",
|
|
||||||
// ",", "TokenType.Comma",
|
|
||||||
// "$", "TokenType.Dollar",
|
|
||||||
// ".", "TokenType.Dot",
|
|
||||||
// "==", "TokenType.Equals",
|
|
||||||
// "=>", "TokenType.GoesTo",
|
|
||||||
// ">", "TokenType.Greater",
|
|
||||||
// ">=", "TokenType.GreaterEqual",
|
|
||||||
// "#", "TokenType.Hash",
|
|
||||||
// "&&", "TokenType.LogicAnd",
|
|
||||||
// "{", "TokenType.LBrace",
|
|
||||||
// "[", "TokenType.LBracket",
|
|
||||||
// "<", "TokenType.Less",
|
|
||||||
// "<=", "TokenType.LessEqual",
|
|
||||||
// "<>=", "TokenType.LessEqualGreater",
|
|
||||||
// "<>", "TokenType.LessOrGreater",
|
|
||||||
// "||", "TokenType.LogicOr",
|
|
||||||
// "(", "TokenType.LParen",
|
|
||||||
// "-", "TokenType.Minus",
|
|
||||||
// "-=", "TokenType.MinusEquals",
|
|
||||||
// "%", "TokenType.Mod",
|
|
||||||
// "%=", "TokenType.ModEquals",
|
|
||||||
// "*=", "TokenType.MulEquals",
|
|
||||||
// "!", "TokenType.Not",
|
|
||||||
// "!=", "TokenType.NotEquals",
|
|
||||||
// "!>", "TokenType.NotGreater",
|
|
||||||
// "!>=", "TokenType.NotGreaterEqual",
|
|
||||||
// "!<", "TokenType.NotLess",
|
|
||||||
// "!<=", "TokenType.NotLessEqual",
|
|
||||||
// "!<>", "TokenType.NotLessEqualGreater",
|
|
||||||
// "+", "TokenType.Plus",
|
|
||||||
// "+=", "TokenType.PlusEquals",
|
|
||||||
// "^^", "TokenType.Pow",
|
|
||||||
// "^^=", "TokenType.PowEquals",
|
|
||||||
// "}", "TokenType.RBrace",
|
|
||||||
// "]", "TokenType.RBracket",
|
|
||||||
// ")", "TokenType.RParen",
|
|
||||||
// ";", "TokenType.Semicolon",
|
|
||||||
// "<<", "TokenType.ShiftLeft",
|
|
||||||
// "<<=", "TokenType.ShiftLeftEqual",
|
|
||||||
// ">>", "TokenType.ShiftRight",
|
|
||||||
// ">>=", "TokenType.ShiftRightEqual",
|
|
||||||
// "..", "TokenType.Slice",
|
|
||||||
// "*", "TokenType.Star",
|
|
||||||
// "?", "TokenType.Ternary",
|
|
||||||
// "~", "TokenType.Tilde",
|
|
||||||
// "--", "TokenType.Decrement",
|
|
||||||
// "!<>=", "TokenType.Unordered",
|
|
||||||
// ">>>", "TokenType.UnsignedShiftRight",
|
|
||||||
// ">>>=", "TokenType.UnsignedShiftRightEqual",
|
|
||||||
// "++", "TokenType.Increment",
|
|
||||||
// "...", "TokenType.Vararg",
|
|
||||||
// "^", "TokenType.Xor",
|
|
||||||
// "^=", "TokenType.XorEquals",
|
|
||||||
// ));
|
|
||||||
// case '0': .. case '9':
|
|
||||||
// currentToken = lexNumber(inputString, endIndex);
|
|
||||||
// break;
|
|
||||||
// case '/':
|
|
||||||
// ++endIndex;
|
|
||||||
// if (isEoF(inputString, endIndex))
|
|
||||||
// {
|
|
||||||
// currentToken.value = "/";
|
|
||||||
// currentToken.type = TokenType.Div;
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// switch (inputString[endIndex])
|
|
||||||
// {
|
|
||||||
// case '/':
|
|
||||||
// case '+':
|
|
||||||
// case '*':
|
|
||||||
// if (iterationStyle == IterationStyle.CODE_ONLY)
|
|
||||||
// {
|
|
||||||
// lexComment(inputString, endIndex, lineNumber);
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
// currentToken.value = lexComment(inputString, endIndex, lineNumber);
|
|
||||||
// currentToken.type = TokenType.Comment;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// case '=':
|
|
||||||
// currentToken.value = "/=";
|
|
||||||
// currentToken.type = TokenType.DivEquals;
|
|
||||||
// ++endIndex;
|
|
||||||
// break;
|
|
||||||
// default:
|
|
||||||
// currentToken.value = "/";
|
|
||||||
// currentToken.type = TokenType.Div;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// break;
|
|
||||||
// case 'r':
|
|
||||||
// ++endIndex;
|
|
||||||
// if (isEoF(inputString, endIndex) || inputString[endIndex] != '"')
|
|
||||||
// goto default;
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value = lexString(inputString, endIndex,
|
|
||||||
// lineNumber, inputString[endIndex], false);
|
|
||||||
// currentToken.type = TokenType.StringLiteral;
|
|
||||||
// break;
|
|
||||||
// case '`':
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value = lexString(inputString, endIndex, lineNumber,
|
|
||||||
// inputString[endIndex], false);
|
|
||||||
// currentToken.type = TokenType.StringLiteral;
|
|
||||||
// break;
|
|
||||||
// case 'x':
|
|
||||||
// ++endIndex;
|
|
||||||
// if (isEoF(inputString, endIndex) || inputString[endIndex] != '"')
|
|
||||||
// goto default;
|
|
||||||
// else
|
|
||||||
// goto case '"'; // BUG: this is incorrect! according to specification, hex data should be lexed differently than "normal" strings
|
|
||||||
// case '\'':
|
|
||||||
// case '"':
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value = lexString(inputString, endIndex, lineNumber,
|
|
||||||
// inputString[endIndex]);
|
|
||||||
// currentToken.type = TokenType.StringLiteral;
|
|
||||||
// break;
|
|
||||||
// case 'q':
|
|
||||||
// currentToken.value = "q";
|
|
||||||
// ++endIndex;
|
|
||||||
// if (!isEoF(inputString, endIndex))
|
|
||||||
// {
|
|
||||||
// switch (inputString[endIndex])
|
|
||||||
// {
|
|
||||||
// case '"':
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value ~= lexDelimitedString(inputString,
|
|
||||||
// endIndex, lineNumber);
|
|
||||||
// currentToken.type = TokenType.StringLiteral;
|
|
||||||
// break outerSwitch;
|
|
||||||
// case '{':
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// currentToken.value ~= lexTokenString(inputString,
|
|
||||||
// endIndex, lineNumber);
|
|
||||||
// currentToken.type = TokenType.StringLiteral;
|
|
||||||
// break outerSwitch;
|
|
||||||
// default:
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// goto default;
|
|
||||||
// case '@':
|
|
||||||
// ++endIndex;
|
|
||||||
// goto default;
|
|
||||||
// default:
|
|
||||||
// while(!isEoF(inputString, endIndex) && !isSeparating(inputString[endIndex]))
|
|
||||||
// ++endIndex;
|
|
||||||
// currentToken.value = inputString[startIndex .. endIndex];
|
|
||||||
// currentToken.type = lookupTokenTypeOptimized(currentToken.value);
|
|
||||||
// //currentToken.type = lookupTokenType(currentToken.value);
|
|
||||||
// currentToken.lineNumber = lineNumber;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// //stderr.writeln(currentToken);
|
|
||||||
// tokenAppender.put(currentToken);
|
|
||||||
//
|
|
||||||
// // This should never happen.
|
|
||||||
// if (endIndex <= prevIndex)
|
|
||||||
// {
|
|
||||||
// stderr.writeln("FAIL");
|
|
||||||
// return [];
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return tokenAppender.data;
|
|
||||||
//}
|
|
||||||
|
|
Loading…
Reference in New Issue