special token sequence

This commit is contained in:
Hackerpilot 2013-01-22 17:42:26 -08:00
parent fbfdc37cf5
commit bd97d1b393
1 changed files with 263 additions and 115 deletions

View File

@ -4,7 +4,7 @@
* This module contains a range-based lexer for the D programming language. * This module contains a range-based lexer for the D programming language.
* *
* Copyright: Brian Schott 2013 * Copyright: Brian Schott 2013
* License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
* Authors: Brian Schott * Authors: Brian Schott
* Source: $(PHOBOSSRC std/d/_lexer.d) * Source: $(PHOBOSSRC std/d/_lexer.d)
*/ */
@ -78,9 +78,11 @@ enum IterationStyle
/// Only include code, not whitespace or comments /// Only include code, not whitespace or comments
CodeOnly = 0, CodeOnly = 0,
/// Includes comments /// Includes comments
IncludeComments = 0b01, IncludeComments = 0b0001,
/// Includes whitespace /// Includes whitespace
IncludeWhitespace = 0b10, IncludeWhitespace = 0b0010,
/// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
IncludeSpecialTokens = 0b0100,
/// Include everything /// Include everything
Everything = IncludeComments | IncludeWhitespace Everything = IncludeComments | IncludeWhitespace
} }
@ -246,7 +248,6 @@ class TokenRange(R) : InputRange!(Token)
"=>", "TokenType.GoesTo", "=>", "TokenType.GoesTo",
">", "TokenType.Greater", ">", "TokenType.Greater",
">=", "TokenType.GreaterEqual", ">=", "TokenType.GreaterEqual",
"#", "TokenType.Hash",
"&&", "TokenType.LogicAnd", "&&", "TokenType.LogicAnd",
"{", "TokenType.LBrace", "{", "TokenType.LBrace",
"[", "TokenType.LBracket", "[", "TokenType.LBracket",
@ -337,6 +338,15 @@ class TokenRange(R) : InputRange!(Token)
case '*': case '*':
case '+': case '+':
current = lexComment(range, index, lineNumber); current = lexComment(range, index, lineNumber);
if (!(iterStyle & IterationStyle.IncludeComments))
{
if (range.empty)
{
_empty = true;
return;
}
popFront();
}
break outer; break outer;
case '=': case '=':
current.type = TokenType.DivEquals; current.type = TokenType.DivEquals;
@ -372,6 +382,31 @@ class TokenRange(R) : InputRange!(Token)
} }
else else
goto default; goto default;
case '#':
string special = lexSpecialTokenSequence(range, index, lineNumber);
if (special)
{
current.type = TokenType.SpecialTokenSequence;
current.value = special;
if (!(iterStyle & IterationStyle.IncludeSpecialTokens))
{
if (range.empty)
{
_empty = true;
return;
}
popFront();
}
}
else
{
current.type = TokenType.Hash;
current.value = "#";
range.popFront();
++index;
break;
}
break;
default: default:
auto app = appender!(ElementType!(R)[])(); auto app = appender!(ElementType!(R)[])();
while(!range.isEoF() && !isSeparating(range.front)) while(!range.isEoF() && !isSeparating(range.front))
@ -396,6 +431,14 @@ private:
StringStyle stringStyle; StringStyle stringStyle;
} }
unittest
{
import std.stdio;
auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
foreach (t; byToken(a))
writeln(t);
}
/** /**
* Listing of all the tokens in the D language. * Listing of all the tokens in the D language.
* *
@ -493,130 +536,129 @@ enum TokenType: uint
// Types // Types
TYPES_BEGIN, /// TYPES_BEGIN, ///
Bool, /// bool, Bool, /// bool
Byte, /// byte, Byte, /// byte
Cdouble, /// cdouble, Cdouble, /// cdouble
Cent, /// cent, Cent, /// cent
Cfloat, /// cfloat, Cfloat, /// cfloat
Char, /// char, Char, /// char
Creal, /// creal, Creal, /// creal
Dchar, /// dchar, Dchar, /// dchar
Double, /// double, Double, /// double
DString, /// dstring DString, /// dstring
Float, /// float, Float, /// float
Function, /// function, Function, /// function
Idouble, /// idouble, Idouble, /// idouble
Ifloat, /// ifloat, Ifloat, /// ifloat
Int, /// int, Int, /// int
Ireal, /// ireal, Ireal, /// ireal
Long, /// long, Long, /// long
Real, /// real, Real, /// real
Short, /// short, Short, /// short
String, /// string String, /// string
Ubyte, /// ubyte, Ubyte, /// ubyte
Ucent, /// ucent, Ucent, /// ucent
Uint, /// uint, Uint, /// uint
Ulong, /// ulong, Ulong, /// ulong
Ushort, /// ushort, Ushort, /// ushort
Void, /// void, Void, /// void
Wchar, /// wchar, Wchar, /// wchar
WString, /// wstring WString, /// wstring
TYPES_END, /// TYPES_END, ///
Template, /// template, Template, /// template
// Keywords // Keywords
KEYWORDS_BEGIN, /// KEYWORDS_BEGIN, ///
ATTRIBUTES_BEGIN, /// ATTRIBUTES_BEGIN, ///
Align, /// align, Align, /// align
Deprecated, /// deprecated, Deprecated, /// deprecated
Extern, /// extern, Extern, /// extern
Pragma, /// pragma, Pragma, /// pragma
PROTECTION_BEGIN, /// PROTECTION_BEGIN, ///
Export, /// export, Export, /// export
Package, /// package, Package, /// package
Private, /// private, Private, /// private
Protected, /// protected, Protected, /// protected
Public, /// public, Public, /// public
PROTECTION_END, /// PROTECTION_END, ///
Abstract, /// abstract, Abstract, /// abstract
AtDisable, /// @disable Auto, /// auto
Auto, /// auto, Const, /// const
Const, /// const,
Final, /// final Final, /// final
Gshared, /// __gshared, Gshared, /// __gshared
Immutable, // immutable, Immutable, // immutable
Inout, // inout, Inout, // inout
Scope, /// scope, Scope, /// scope
Shared, // shared, Shared, // shared
Static, /// static, Static, /// static
Synchronized, /// synchronized, Synchronized, /// synchronized
ATTRIBUTES_END, /// ATTRIBUTES_END, ///
Alias, /// alias, Alias, /// alias
Asm, /// asm, Asm, /// asm
Assert, /// assert, Assert, /// assert
Body, /// body, Body, /// body
Break, /// break, Break, /// break
Case, /// case, Case, /// case
Cast, /// cast, Cast, /// cast
Catch, /// catch, Catch, /// catch
Class, /// class, Class, /// class
Continue, /// continue, Continue, /// continue
Debug, /// debug, Debug, /// debug
Default, /// default, Default, /// default
Delegate, /// delegate, Delegate, /// delegate
Delete, /// delete, Delete, /// delete
Do, /// do, Do, /// do
Else, /// else, Else, /// else
Enum, /// enum, Enum, /// enum
False, /// false, False, /// false
Finally, /// finally, Finally, /// finally
Foreach, /// foreach, Foreach, /// foreach
Foreach_reverse, /// foreach_reverse, Foreach_reverse, /// foreach_reverse
For, /// for, For, /// for
Goto, /// goto, Goto, /// goto
If, /// if , If, /// if
Import, /// import, Import, /// import
In, /// in, In, /// in
Interface, /// interface, Interface, /// interface
Invariant, /// invariant, Invariant, /// invariant
Is, /// is, Is, /// is
Lazy, /// lazy, Lazy, /// lazy
Macro, /// macro, Macro, /// macro
Mixin, /// mixin, Mixin, /// mixin
Module, /// module, Module, /// module
New, /// new, New, /// new
Nothrow, /// nothrow, Nothrow, /// nothrow
Null, /// null, Null, /// null
Out, /// out, Out, /// out
Override, /// override, Override, /// override
Pure, /// pure, Pure, /// pure
Ref, /// ref, Ref, /// ref
Return, /// return, Return, /// return
Struct, /// struct, Struct, /// struct
Super, /// super, Super, /// super
Switch, /// switch , Switch, /// switch
This, /// this, This, /// this
Throw, /// throw, Throw, /// throw
True, /// true, True, /// true
Try, /// try, Try, /// try
Typedef, /// typedef, Typedef, /// typedef
Typeid, /// typeid, Typeid, /// typeid
Typeof, /// typeof, Typeof, /// typeof
Union, /// union, Union, /// union
Unittest, /// unittest, Unittest, /// unittest
Version, /// version, Version, /// version
Volatile, /// volatile, Volatile, /// volatile
While, /// while , While, /// while
With, /// with, With, /// with
KEYWORDS_END, /// KEYWORDS_END, ///
// Constants // Constants
CONSTANTS_BEGIN, CONSTANTS_BEGIN, ///
File, /// __FILE__, File, /// __FILE__
Line, /// __LINE__, Line, /// __LINE__
Thread, /// __thread, Thread, /// __thread
Traits, /// __traits, Traits, /// __traits
CONSTANTS_END, /// CONSTANTS_END, ///
// Misc // Misc
@ -625,6 +667,7 @@ enum TokenType: uint
Identifier, /// anything else Identifier, /// anything else
ScriptLine, // Line at the beginning of source file that starts from #! ScriptLine, // Line at the beginning of source file that starts from #!
Whitespace, /// whitespace Whitespace, /// whitespace
SpecialTokenSequence, /// #line 10 "file.d"
MISC_END, /// MISC_END, ///
// Literals // Literals
@ -1429,11 +1472,11 @@ body
int depth = 1; int depth = 1;
while (!r.empty) while (!r.empty)
{ {
if (r.front == TokenType.LBrace) if (r.front.type == TokenType.LBrace)
{ {
++depth; ++depth;
} }
else if (r.front == TokenType.RBrace) else if (r.front.type == TokenType.RBrace)
{ {
--depth; --depth;
if (depth <= 0) if (depth <= 0)
@ -1479,7 +1522,7 @@ unittest
{ {
uint i; uint i;
uint l; uint l;
auto a = "q{import std.stdio;}"; auto a = "q{import std.stdio;} abcd";
auto ar = lexTokenString(a, i, l); auto ar = lexTokenString(a, i, l);
assert (ar == TokenType.StringLiteral); assert (ar == TokenType.StringLiteral);
assert (ar == "import std.stdio;"); assert (ar == "import std.stdio;");
@ -2071,6 +2114,109 @@ unittest
assert (pr == TokenType.DoubleLiteral); assert (pr == TokenType.DoubleLiteral);
} }
string lexSpecialTokenSequence(R)(ref R input, ref uint index,
ref uint lineNumber)
in
{
assert (input.front == '#');
}
body
{
auto i = index;
auto r = input.save;
auto l = lineNumber;
r.popFront();
++i;
auto app = appender!(ElementType!(R)[])();
app.put('#');
auto specialType = appender!(ElementType!(R)[])();
while (!r.empty && !isSeparating(r.front))
{
specialType.put(r.front);
++i;
r.popFront();
}
if (to!string(specialType.data) != "line")
return null;
app.put(specialType.data);
if (std.uni.isWhite(r.front))
app.put(lexWhitespace(r, i, l).value);
if (!isDigit(r.front))
return null;
auto t = lexNumber(r, i, l);
if (t != TokenType.IntLiteral)
return null;
app.put(t.value);
l = to!uint(t.value);
if (!isNewline(r))
{
if (!r.empty && std.uni.isWhite(r.front))
app.put(lexWhitespace(r, i, l).value);
if (!r.empty && r.front == '"')
{
auto fSpecApp = appender!(ElementType!(R)[])();
fSpecApp.put(r.front);
r.popFront();
++i;
while (!r.empty)
{
if (r.front == '"')
{
fSpecApp.put('"');
++i;
r.popFront();
break;
}
++i;
fSpecApp.put(r.front);
r.popFront();
}
app.put(fSpecApp.data);
}
else
return null;
}
app.put(popNewline(r, i));
input.popFrontN(i - index);
index = i;
lineNumber = l;
return to!string(app.data);
}
unittest
{
uint i;
uint l;
auto a = "#line 10\n";
auto ar = lexSpecialTokenSequence(a, i, l);
assert (ar == "#line 10\n");
assert (a == "");
assert (l == 10);
auto b = "#line 9201 \"test.d\"\n";
auto br = lexSpecialTokenSequence(b, i, l);
assert (l == 9201);
assert (br == "#line 9201 \"test.d\"\n");
assert (b == "");
auto c = `#lin`;
auto cr = lexSpecialTokenSequence(c, i, l);
assert (l == 9201);
assert (cr is null);
assert (c == `#lin`);
}
pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C) pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
{ {
switch (ch) switch (ch)
@ -2364,3 +2510,5 @@ string generateCaseTrie(string[] args ...)
} }
return printCaseStatements(t, ""); return printCaseStatements(t, "");
} }
void main() {}