special token sequence
This commit is contained in:
parent
fbfdc37cf5
commit
bd97d1b393
378
std/d/lexer.d
378
std/d/lexer.d
|
@ -4,7 +4,7 @@
|
||||||
* This module contains a range-based lexer for the D programming language.
|
* This module contains a range-based lexer for the D programming language.
|
||||||
*
|
*
|
||||||
* Copyright: Brian Schott 2013
|
* Copyright: Brian Schott 2013
|
||||||
* License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
|
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||||
* Authors: Brian Schott
|
* Authors: Brian Schott
|
||||||
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
||||||
*/
|
*/
|
||||||
|
@ -78,9 +78,11 @@ enum IterationStyle
|
||||||
/// Only include code, not whitespace or comments
|
/// Only include code, not whitespace or comments
|
||||||
CodeOnly = 0,
|
CodeOnly = 0,
|
||||||
/// Includes comments
|
/// Includes comments
|
||||||
IncludeComments = 0b01,
|
IncludeComments = 0b0001,
|
||||||
/// Includes whitespace
|
/// Includes whitespace
|
||||||
IncludeWhitespace = 0b10,
|
IncludeWhitespace = 0b0010,
|
||||||
|
/// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
|
||||||
|
IncludeSpecialTokens = 0b0100,
|
||||||
/// Include everything
|
/// Include everything
|
||||||
Everything = IncludeComments | IncludeWhitespace
|
Everything = IncludeComments | IncludeWhitespace
|
||||||
}
|
}
|
||||||
|
@ -246,7 +248,6 @@ class TokenRange(R) : InputRange!(Token)
|
||||||
"=>", "TokenType.GoesTo",
|
"=>", "TokenType.GoesTo",
|
||||||
">", "TokenType.Greater",
|
">", "TokenType.Greater",
|
||||||
">=", "TokenType.GreaterEqual",
|
">=", "TokenType.GreaterEqual",
|
||||||
"#", "TokenType.Hash",
|
|
||||||
"&&", "TokenType.LogicAnd",
|
"&&", "TokenType.LogicAnd",
|
||||||
"{", "TokenType.LBrace",
|
"{", "TokenType.LBrace",
|
||||||
"[", "TokenType.LBracket",
|
"[", "TokenType.LBracket",
|
||||||
|
@ -337,6 +338,15 @@ class TokenRange(R) : InputRange!(Token)
|
||||||
case '*':
|
case '*':
|
||||||
case '+':
|
case '+':
|
||||||
current = lexComment(range, index, lineNumber);
|
current = lexComment(range, index, lineNumber);
|
||||||
|
if (!(iterStyle & IterationStyle.IncludeComments))
|
||||||
|
{
|
||||||
|
if (range.empty)
|
||||||
|
{
|
||||||
|
_empty = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
popFront();
|
||||||
|
}
|
||||||
break outer;
|
break outer;
|
||||||
case '=':
|
case '=':
|
||||||
current.type = TokenType.DivEquals;
|
current.type = TokenType.DivEquals;
|
||||||
|
@ -372,6 +382,31 @@ class TokenRange(R) : InputRange!(Token)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
goto default;
|
goto default;
|
||||||
|
case '#':
|
||||||
|
string special = lexSpecialTokenSequence(range, index, lineNumber);
|
||||||
|
if (special)
|
||||||
|
{
|
||||||
|
current.type = TokenType.SpecialTokenSequence;
|
||||||
|
current.value = special;
|
||||||
|
if (!(iterStyle & IterationStyle.IncludeSpecialTokens))
|
||||||
|
{
|
||||||
|
if (range.empty)
|
||||||
|
{
|
||||||
|
_empty = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
popFront();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current.type = TokenType.Hash;
|
||||||
|
current.value = "#";
|
||||||
|
range.popFront();
|
||||||
|
++index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
auto app = appender!(ElementType!(R)[])();
|
auto app = appender!(ElementType!(R)[])();
|
||||||
while(!range.isEoF() && !isSeparating(range.front))
|
while(!range.isEoF() && !isSeparating(range.front))
|
||||||
|
@ -396,6 +431,14 @@ private:
|
||||||
StringStyle stringStyle;
|
StringStyle stringStyle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
import std.stdio;
|
||||||
|
auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
|
||||||
|
foreach (t; byToken(a))
|
||||||
|
writeln(t);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Listing of all the tokens in the D language.
|
* Listing of all the tokens in the D language.
|
||||||
*
|
*
|
||||||
|
@ -493,130 +536,129 @@ enum TokenType: uint
|
||||||
|
|
||||||
// Types
|
// Types
|
||||||
TYPES_BEGIN, ///
|
TYPES_BEGIN, ///
|
||||||
Bool, /// bool,
|
Bool, /// bool
|
||||||
Byte, /// byte,
|
Byte, /// byte
|
||||||
Cdouble, /// cdouble,
|
Cdouble, /// cdouble
|
||||||
Cent, /// cent,
|
Cent, /// cent
|
||||||
Cfloat, /// cfloat,
|
Cfloat, /// cfloat
|
||||||
Char, /// char,
|
Char, /// char
|
||||||
Creal, /// creal,
|
Creal, /// creal
|
||||||
Dchar, /// dchar,
|
Dchar, /// dchar
|
||||||
Double, /// double,
|
Double, /// double
|
||||||
DString, /// dstring
|
DString, /// dstring
|
||||||
Float, /// float,
|
Float, /// float
|
||||||
Function, /// function,
|
Function, /// function
|
||||||
Idouble, /// idouble,
|
Idouble, /// idouble
|
||||||
Ifloat, /// ifloat,
|
Ifloat, /// ifloat
|
||||||
Int, /// int,
|
Int, /// int
|
||||||
Ireal, /// ireal,
|
Ireal, /// ireal
|
||||||
Long, /// long,
|
Long, /// long
|
||||||
Real, /// real,
|
Real, /// real
|
||||||
Short, /// short,
|
Short, /// short
|
||||||
String, /// string
|
String, /// string
|
||||||
Ubyte, /// ubyte,
|
Ubyte, /// ubyte
|
||||||
Ucent, /// ucent,
|
Ucent, /// ucent
|
||||||
Uint, /// uint,
|
Uint, /// uint
|
||||||
Ulong, /// ulong,
|
Ulong, /// ulong
|
||||||
Ushort, /// ushort,
|
Ushort, /// ushort
|
||||||
Void, /// void,
|
Void, /// void
|
||||||
Wchar, /// wchar,
|
Wchar, /// wchar
|
||||||
WString, /// wstring
|
WString, /// wstring
|
||||||
TYPES_END, ///
|
TYPES_END, ///
|
||||||
|
|
||||||
Template, /// template,
|
Template, /// template
|
||||||
|
|
||||||
// Keywords
|
// Keywords
|
||||||
KEYWORDS_BEGIN, ///
|
KEYWORDS_BEGIN, ///
|
||||||
ATTRIBUTES_BEGIN, ///
|
ATTRIBUTES_BEGIN, ///
|
||||||
Align, /// align,
|
Align, /// align
|
||||||
Deprecated, /// deprecated,
|
Deprecated, /// deprecated
|
||||||
Extern, /// extern,
|
Extern, /// extern
|
||||||
Pragma, /// pragma,
|
Pragma, /// pragma
|
||||||
PROTECTION_BEGIN, ///
|
PROTECTION_BEGIN, ///
|
||||||
Export, /// export,
|
Export, /// export
|
||||||
Package, /// package,
|
Package, /// package
|
||||||
Private, /// private,
|
Private, /// private
|
||||||
Protected, /// protected,
|
Protected, /// protected
|
||||||
Public, /// public,
|
Public, /// public
|
||||||
PROTECTION_END, ///
|
PROTECTION_END, ///
|
||||||
Abstract, /// abstract,
|
Abstract, /// abstract
|
||||||
AtDisable, /// @disable
|
Auto, /// auto
|
||||||
Auto, /// auto,
|
Const, /// const
|
||||||
Const, /// const,
|
|
||||||
Final, /// final
|
Final, /// final
|
||||||
Gshared, /// __gshared,
|
Gshared, /// __gshared
|
||||||
Immutable, // immutable,
|
Immutable, // immutable
|
||||||
Inout, // inout,
|
Inout, // inout
|
||||||
Scope, /// scope,
|
Scope, /// scope
|
||||||
Shared, // shared,
|
Shared, // shared
|
||||||
Static, /// static,
|
Static, /// static
|
||||||
Synchronized, /// synchronized,
|
Synchronized, /// synchronized
|
||||||
ATTRIBUTES_END, ///
|
ATTRIBUTES_END, ///
|
||||||
Alias, /// alias,
|
Alias, /// alias
|
||||||
Asm, /// asm,
|
Asm, /// asm
|
||||||
Assert, /// assert,
|
Assert, /// assert
|
||||||
Body, /// body,
|
Body, /// body
|
||||||
Break, /// break,
|
Break, /// break
|
||||||
Case, /// case,
|
Case, /// case
|
||||||
Cast, /// cast,
|
Cast, /// cast
|
||||||
Catch, /// catch,
|
Catch, /// catch
|
||||||
Class, /// class,
|
Class, /// class
|
||||||
Continue, /// continue,
|
Continue, /// continue
|
||||||
Debug, /// debug,
|
Debug, /// debug
|
||||||
Default, /// default,
|
Default, /// default
|
||||||
Delegate, /// delegate,
|
Delegate, /// delegate
|
||||||
Delete, /// delete,
|
Delete, /// delete
|
||||||
Do, /// do,
|
Do, /// do
|
||||||
Else, /// else,
|
Else, /// else
|
||||||
Enum, /// enum,
|
Enum, /// enum
|
||||||
False, /// false,
|
False, /// false
|
||||||
Finally, /// finally,
|
Finally, /// finally
|
||||||
Foreach, /// foreach,
|
Foreach, /// foreach
|
||||||
Foreach_reverse, /// foreach_reverse,
|
Foreach_reverse, /// foreach_reverse
|
||||||
For, /// for,
|
For, /// for
|
||||||
Goto, /// goto,
|
Goto, /// goto
|
||||||
If, /// if ,
|
If, /// if
|
||||||
Import, /// import,
|
Import, /// import
|
||||||
In, /// in,
|
In, /// in
|
||||||
Interface, /// interface,
|
Interface, /// interface
|
||||||
Invariant, /// invariant,
|
Invariant, /// invariant
|
||||||
Is, /// is,
|
Is, /// is
|
||||||
Lazy, /// lazy,
|
Lazy, /// lazy
|
||||||
Macro, /// macro,
|
Macro, /// macro
|
||||||
Mixin, /// mixin,
|
Mixin, /// mixin
|
||||||
Module, /// module,
|
Module, /// module
|
||||||
New, /// new,
|
New, /// new
|
||||||
Nothrow, /// nothrow,
|
Nothrow, /// nothrow
|
||||||
Null, /// null,
|
Null, /// null
|
||||||
Out, /// out,
|
Out, /// out
|
||||||
Override, /// override,
|
Override, /// override
|
||||||
Pure, /// pure,
|
Pure, /// pure
|
||||||
Ref, /// ref,
|
Ref, /// ref
|
||||||
Return, /// return,
|
Return, /// return
|
||||||
Struct, /// struct,
|
Struct, /// struct
|
||||||
Super, /// super,
|
Super, /// super
|
||||||
Switch, /// switch ,
|
Switch, /// switch
|
||||||
This, /// this,
|
This, /// this
|
||||||
Throw, /// throw,
|
Throw, /// throw
|
||||||
True, /// true,
|
True, /// true
|
||||||
Try, /// try,
|
Try, /// try
|
||||||
Typedef, /// typedef,
|
Typedef, /// typedef
|
||||||
Typeid, /// typeid,
|
Typeid, /// typeid
|
||||||
Typeof, /// typeof,
|
Typeof, /// typeof
|
||||||
Union, /// union,
|
Union, /// union
|
||||||
Unittest, /// unittest,
|
Unittest, /// unittest
|
||||||
Version, /// version,
|
Version, /// version
|
||||||
Volatile, /// volatile,
|
Volatile, /// volatile
|
||||||
While, /// while ,
|
While, /// while
|
||||||
With, /// with,
|
With, /// with
|
||||||
KEYWORDS_END, ///
|
KEYWORDS_END, ///
|
||||||
|
|
||||||
// Constants
|
// Constants
|
||||||
CONSTANTS_BEGIN,
|
CONSTANTS_BEGIN, ///
|
||||||
File, /// __FILE__,
|
File, /// __FILE__
|
||||||
Line, /// __LINE__,
|
Line, /// __LINE__
|
||||||
Thread, /// __thread,
|
Thread, /// __thread
|
||||||
Traits, /// __traits,
|
Traits, /// __traits
|
||||||
CONSTANTS_END, ///
|
CONSTANTS_END, ///
|
||||||
|
|
||||||
// Misc
|
// Misc
|
||||||
|
@ -625,6 +667,7 @@ enum TokenType: uint
|
||||||
Identifier, /// anything else
|
Identifier, /// anything else
|
||||||
ScriptLine, // Line at the beginning of source file that starts from #!
|
ScriptLine, // Line at the beginning of source file that starts from #!
|
||||||
Whitespace, /// whitespace
|
Whitespace, /// whitespace
|
||||||
|
SpecialTokenSequence, /// #line 10 "file.d"
|
||||||
MISC_END, ///
|
MISC_END, ///
|
||||||
|
|
||||||
// Literals
|
// Literals
|
||||||
|
@ -1429,11 +1472,11 @@ body
|
||||||
int depth = 1;
|
int depth = 1;
|
||||||
while (!r.empty)
|
while (!r.empty)
|
||||||
{
|
{
|
||||||
if (r.front == TokenType.LBrace)
|
if (r.front.type == TokenType.LBrace)
|
||||||
{
|
{
|
||||||
++depth;
|
++depth;
|
||||||
}
|
}
|
||||||
else if (r.front == TokenType.RBrace)
|
else if (r.front.type == TokenType.RBrace)
|
||||||
{
|
{
|
||||||
--depth;
|
--depth;
|
||||||
if (depth <= 0)
|
if (depth <= 0)
|
||||||
|
@ -1479,7 +1522,7 @@ unittest
|
||||||
{
|
{
|
||||||
uint i;
|
uint i;
|
||||||
uint l;
|
uint l;
|
||||||
auto a = "q{import std.stdio;}";
|
auto a = "q{import std.stdio;} abcd";
|
||||||
auto ar = lexTokenString(a, i, l);
|
auto ar = lexTokenString(a, i, l);
|
||||||
assert (ar == TokenType.StringLiteral);
|
assert (ar == TokenType.StringLiteral);
|
||||||
assert (ar == "import std.stdio;");
|
assert (ar == "import std.stdio;");
|
||||||
|
@ -2071,6 +2114,109 @@ unittest
|
||||||
assert (pr == TokenType.DoubleLiteral);
|
assert (pr == TokenType.DoubleLiteral);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string lexSpecialTokenSequence(R)(ref R input, ref uint index,
|
||||||
|
ref uint lineNumber)
|
||||||
|
in
|
||||||
|
{
|
||||||
|
assert (input.front == '#');
|
||||||
|
}
|
||||||
|
body
|
||||||
|
{
|
||||||
|
auto i = index;
|
||||||
|
auto r = input.save;
|
||||||
|
auto l = lineNumber;
|
||||||
|
r.popFront();
|
||||||
|
++i;
|
||||||
|
auto app = appender!(ElementType!(R)[])();
|
||||||
|
app.put('#');
|
||||||
|
|
||||||
|
auto specialType = appender!(ElementType!(R)[])();
|
||||||
|
|
||||||
|
while (!r.empty && !isSeparating(r.front))
|
||||||
|
{
|
||||||
|
specialType.put(r.front);
|
||||||
|
++i;
|
||||||
|
r.popFront();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (to!string(specialType.data) != "line")
|
||||||
|
return null;
|
||||||
|
app.put(specialType.data);
|
||||||
|
|
||||||
|
if (std.uni.isWhite(r.front))
|
||||||
|
app.put(lexWhitespace(r, i, l).value);
|
||||||
|
|
||||||
|
|
||||||
|
if (!isDigit(r.front))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
auto t = lexNumber(r, i, l);
|
||||||
|
if (t != TokenType.IntLiteral)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
app.put(t.value);
|
||||||
|
l = to!uint(t.value);
|
||||||
|
|
||||||
|
if (!isNewline(r))
|
||||||
|
{
|
||||||
|
if (!r.empty && std.uni.isWhite(r.front))
|
||||||
|
app.put(lexWhitespace(r, i, l).value);
|
||||||
|
|
||||||
|
if (!r.empty && r.front == '"')
|
||||||
|
{
|
||||||
|
auto fSpecApp = appender!(ElementType!(R)[])();
|
||||||
|
fSpecApp.put(r.front);
|
||||||
|
r.popFront();
|
||||||
|
++i;
|
||||||
|
while (!r.empty)
|
||||||
|
{
|
||||||
|
if (r.front == '"')
|
||||||
|
{
|
||||||
|
fSpecApp.put('"');
|
||||||
|
++i;
|
||||||
|
r.popFront();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
fSpecApp.put(r.front);
|
||||||
|
r.popFront();
|
||||||
|
}
|
||||||
|
app.put(fSpecApp.data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
app.put(popNewline(r, i));
|
||||||
|
input.popFrontN(i - index);
|
||||||
|
index = i;
|
||||||
|
lineNumber = l;
|
||||||
|
return to!string(app.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
uint i;
|
||||||
|
uint l;
|
||||||
|
auto a = "#line 10\n";
|
||||||
|
auto ar = lexSpecialTokenSequence(a, i, l);
|
||||||
|
assert (ar == "#line 10\n");
|
||||||
|
assert (a == "");
|
||||||
|
assert (l == 10);
|
||||||
|
|
||||||
|
auto b = "#line 9201 \"test.d\"\n";
|
||||||
|
auto br = lexSpecialTokenSequence(b, i, l);
|
||||||
|
assert (l == 9201);
|
||||||
|
assert (br == "#line 9201 \"test.d\"\n");
|
||||||
|
assert (b == "");
|
||||||
|
|
||||||
|
auto c = `#lin`;
|
||||||
|
auto cr = lexSpecialTokenSequence(c, i, l);
|
||||||
|
assert (l == 9201);
|
||||||
|
assert (cr is null);
|
||||||
|
assert (c == `#lin`);
|
||||||
|
}
|
||||||
|
|
||||||
pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
|
pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
|
||||||
{
|
{
|
||||||
switch (ch)
|
switch (ch)
|
||||||
|
@ -2364,3 +2510,5 @@ string generateCaseTrie(string[] args ...)
|
||||||
}
|
}
|
||||||
return printCaseStatements(t, "");
|
return printCaseStatements(t, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void main() {}
|
||||||
|
|
Loading…
Reference in New Issue