special token sequence

2013-01-22 17:42:26 -08:00 · 2013-01-22 17:42:26 -08:00 · bd97d1b393
parent fbfdc37cf5
commit bd97d1b393
1 changed files with 263 additions and 115 deletions
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -4,7 +4,7 @@
 * This module contains a range-based lexer for the D programming language.
 *
 * Copyright: Brian Schott 2013
- * License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
+ * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
 * Authors: Brian Schott
 * Source: $(PHOBOSSRC std/d/_lexer.d)
 */
@ -78,9 +78,11 @@ enum IterationStyle
 	/// Only include code, not whitespace or comments
 	CodeOnly = 0,
 	/// Includes comments
-	IncludeComments = 0b01,
+	IncludeComments = 0b0001,
 	/// Includes whitespace
-	IncludeWhitespace = 0b10,
+	IncludeWhitespace = 0b0010,
    /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
    IncludeSpecialTokens = 0b0100,
 	/// Include everything
 	Everything = IncludeComments | IncludeWhitespace
 }
@ -246,7 +248,6 @@ class TokenRange(R) : InputRange!(Token)
 			"=>",   "TokenType.GoesTo",
 			">",    "TokenType.Greater",
 			">=",   "TokenType.GreaterEqual",
 			"#",    "TokenType.Hash",
 			"&&",   "TokenType.LogicAnd",
 			"{",    "TokenType.LBrace",
 			"[",    "TokenType.LBracket",
@ -337,6 +338,15 @@ class TokenRange(R) : InputRange!(Token)
 			case '*':
 			case '+':
 				current = lexComment(range, index, lineNumber);
                if (!(iterStyle & IterationStyle.IncludeComments))
                {
                    if (range.empty)
                    {
                        _empty = true;
                        return;
                    }
                    popFront();
                }
 				break outer;
 			case '=':
 				current.type = TokenType.DivEquals;
@ -372,6 +382,31 @@ class TokenRange(R) : InputRange!(Token)
 			}
 			else
 				goto default;
        case '#':
            string special = lexSpecialTokenSequence(range, index, lineNumber);
            if (special)
            {
                current.type = TokenType.SpecialTokenSequence;
                current.value = special;
                if (!(iterStyle & IterationStyle.IncludeSpecialTokens))
                {
                    if (range.empty)
                    {
                        _empty = true;
                        return;
                    }
                    popFront();
                }
            }
            else
            {
                current.type = TokenType.Hash;
                current.value = "#";
                range.popFront();
 				++index;
 				break;
            }
            break;
 		default:
 			auto app = appender!(ElementType!(R)[])();
 			while(!range.isEoF() && !isSeparating(range.front))
@ -396,6 +431,14 @@ private:
 	StringStyle stringStyle;
 }
 unittest
 {
    import std.stdio;
    auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
    foreach (t; byToken(a))
        writeln(t);
 }
 /**
 * Listing of all the tokens in the D language.
 *
@ -493,130 +536,129 @@ enum TokenType: uint
 	// Types
 	TYPES_BEGIN, ///
-	Bool, /// bool,
+	Bool, /// bool
-	Byte, /// byte,
+	Byte, /// byte
-	Cdouble, /// cdouble,
+	Cdouble, /// cdouble
-	Cent, /// cent,
+	Cent, /// cent
-	Cfloat, /// cfloat,
+	Cfloat, /// cfloat
-	Char, /// char,
+	Char, /// char
-	Creal, /// creal,
+	Creal, /// creal
-	Dchar, /// dchar,
+	Dchar, /// dchar
-	Double, /// double,
+	Double, /// double
 	DString, /// dstring
-	Float, /// float,
+	Float, /// float
-	Function, /// function,
+	Function, /// function
-	Idouble, /// idouble,
+	Idouble, /// idouble
-	Ifloat, /// ifloat,
+	Ifloat, /// ifloat
-	Int, /// int,
+	Int, /// int
-	Ireal, /// ireal,
+	Ireal, /// ireal
-	Long, /// long,
+	Long, /// long
-	Real, /// real,
+	Real, /// real
-	Short, /// short,
+	Short, /// short
 	String, /// string
-	Ubyte, /// ubyte,
+	Ubyte, /// ubyte
-	Ucent, /// ucent,
+	Ucent, /// ucent
-	Uint, /// uint,
+	Uint, /// uint
-	Ulong, /// ulong,
+	Ulong, /// ulong
-	Ushort, /// ushort,
+	Ushort, /// ushort
-	Void, /// void,
+	Void, /// void
-	Wchar, /// wchar,
+	Wchar, /// wchar
 	WString, /// wstring
 	TYPES_END, ///
-	Template, /// template,
+	Template, /// template
 	// Keywords
 	KEYWORDS_BEGIN, ///
 		ATTRIBUTES_BEGIN, ///
-		Align, /// align,
+		Align, /// align
-		Deprecated, /// deprecated,
+		Deprecated, /// deprecated
-		Extern, /// extern,
+		Extern, /// extern
-		Pragma, /// pragma,
+		Pragma, /// pragma
 			PROTECTION_BEGIN, ///
-			Export, /// export,
+			Export, /// export
-			Package, /// package,
+			Package, /// package
-			Private, /// private,
+			Private, /// private
-			Protected, /// protected,
+			Protected, /// protected
-			Public, /// public,
+			Public, /// public
 			PROTECTION_END, ///
-		Abstract, /// abstract,
+		Abstract, /// abstract
-		AtDisable, /// @disable
+		Auto, /// auto
-		Auto, /// auto,
+		Const, /// const
 		Const, /// const,
 		Final, /// final
-		Gshared, /// __gshared,
+		Gshared, /// __gshared
-		Immutable, // immutable,
+		Immutable, // immutable
-		Inout, // inout,
+		Inout, // inout
-		Scope, /// scope,
+		Scope, /// scope
-		Shared, // shared,
+		Shared, // shared
-		Static, /// static,
+		Static, /// static
-		Synchronized, /// synchronized,
+		Synchronized, /// synchronized
 		ATTRIBUTES_END, ///
-	Alias, /// alias,
+	Alias, /// alias
-	Asm, /// asm,
+	Asm, /// asm
-	Assert, /// assert,
+	Assert, /// assert
-	Body, /// body,
+	Body, /// body
-	Break, /// break,
+	Break, /// break
-	Case, /// case,
+	Case, /// case
-	Cast, /// cast,
+	Cast, /// cast
-	Catch, /// catch,
+	Catch, /// catch
-	Class, /// class,
+	Class, /// class
-	Continue, /// continue,
+	Continue, /// continue
-	Debug, /// debug,
+	Debug, /// debug
-	Default, /// default,
+	Default, /// default
-	Delegate, /// delegate,
+	Delegate, /// delegate
-	Delete, /// delete,
+	Delete, /// delete
-	Do, /// do,
+	Do, /// do
-	Else, /// else,
+	Else, /// else
-	Enum, /// enum,
+	Enum, /// enum
-	False, /// false,
+	False, /// false
-	Finally, /// finally,
+	Finally, /// finally
-	Foreach, /// foreach,
+	Foreach, /// foreach
-	Foreach_reverse, /// foreach_reverse,
+	Foreach_reverse, /// foreach_reverse
-	For, /// for,
+	For, /// for
-	Goto, /// goto,
+	Goto, /// goto
-	If, /// if ,
+	If, /// if
-	Import, /// import,
+	Import, /// import
-	In, /// in,
+	In, /// in
-	Interface, /// interface,
+	Interface, /// interface
-	Invariant, /// invariant,
+	Invariant, /// invariant
-	Is, /// is,
+	Is, /// is
-	Lazy, /// lazy,
+	Lazy, /// lazy
-	Macro, /// macro,
+	Macro, /// macro
-	Mixin, /// mixin,
+	Mixin, /// mixin
-	Module, /// module,
+	Module, /// module
-	New, /// new,
+	New, /// new
-	Nothrow, /// nothrow,
+	Nothrow, /// nothrow
-	Null, /// null,
+	Null, /// null
-	Out, /// out,
+	Out, /// out
-	Override, /// override,
+	Override, /// override
-	Pure, /// pure,
+	Pure, /// pure
-	Ref, /// ref,
+	Ref, /// ref
-	Return, /// return,
+	Return, /// return
-	Struct, /// struct,
+	Struct, /// struct
-	Super, /// super,
+	Super, /// super
-	Switch, /// switch ,
+	Switch, /// switch
-	This, /// this,
+	This, /// this
-	Throw, /// throw,
+	Throw, /// throw
-	True, /// true,
+	True, /// true
-	Try, /// try,
+	Try, /// try
-	Typedef, /// typedef,
+	Typedef, /// typedef
-	Typeid, /// typeid,
+	Typeid, /// typeid
-	Typeof, /// typeof,
+	Typeof, /// typeof
-	Union, /// union,
+	Union, /// union
-	Unittest, /// unittest,
+	Unittest, /// unittest
-	Version, /// version,
+	Version, /// version
-	Volatile, /// volatile,
+	Volatile, /// volatile
-	While, /// while ,
+	While, /// while
-	With, /// with,
+	With, /// with
 	KEYWORDS_END, ///
 	// Constants
-	CONSTANTS_BEGIN,
+	CONSTANTS_BEGIN, ///
-	File, /// __FILE__,
+	File, /// __FILE__
-	Line, /// __LINE__,
+	Line, /// __LINE__
-	Thread, /// __thread,
+	Thread, /// __thread
-	Traits, /// __traits,
+	Traits, /// __traits
 	CONSTANTS_END, ///
 	// Misc
@ -625,6 +667,7 @@ enum TokenType: uint
 	Identifier, /// anything else
 	ScriptLine, // Line at the beginning of source file that starts from #!
 	Whitespace, /// whitespace
    SpecialTokenSequence, /// #line 10 "file.d"
 	MISC_END, ///
 	// Literals
@ -1429,11 +1472,11 @@ body
 	int depth = 1;
 	while (!r.empty)
 	{
-		if (r.front == TokenType.LBrace)
+		if (r.front.type == TokenType.LBrace)
 		{
 			++depth;
 		}
-		else if (r.front == TokenType.RBrace)
+		else if (r.front.type == TokenType.RBrace)
 		{
 			--depth;
 			if (depth <= 0)
@ -1479,7 +1522,7 @@ unittest
 {
 	uint i;
 	uint l;
-	auto a = "q{import std.stdio;}";
+	auto a = "q{import std.stdio;} abcd";
 	auto ar = lexTokenString(a, i, l);
 	assert (ar == TokenType.StringLiteral);
 	assert (ar == "import std.stdio;");
@ -2071,6 +2114,109 @@ unittest
 	assert (pr == TokenType.DoubleLiteral);
 }
 string lexSpecialTokenSequence(R)(ref R input, ref uint index,
    ref uint lineNumber)
 in
 {
    assert (input.front == '#');
 }
 body
 {
    auto i = index;
    auto r = input.save;
    auto l = lineNumber;
    r.popFront();
    ++i;
    auto app = appender!(ElementType!(R)[])();
    app.put('#');
    auto specialType = appender!(ElementType!(R)[])();
    while (!r.empty && !isSeparating(r.front))
    {
        specialType.put(r.front);
        ++i;
        r.popFront();
    }
    if (to!string(specialType.data) != "line")
        return null;
    app.put(specialType.data);
    if (std.uni.isWhite(r.front))
        app.put(lexWhitespace(r, i, l).value);
    if (!isDigit(r.front))
        return null;
    auto t = lexNumber(r, i, l);
    if (t != TokenType.IntLiteral)
        return null;
    app.put(t.value);
    l = to!uint(t.value);
    if (!isNewline(r))
    {
        if (!r.empty && std.uni.isWhite(r.front))
            app.put(lexWhitespace(r, i, l).value);
        if (!r.empty && r.front == '"')
        {
            auto fSpecApp = appender!(ElementType!(R)[])();
            fSpecApp.put(r.front);
            r.popFront();
            ++i;
            while (!r.empty)
            {
                if (r.front == '"')
                {
                    fSpecApp.put('"');
                    ++i;
                    r.popFront();
                    break;
                }
                ++i;
                fSpecApp.put(r.front);
                r.popFront();
            }
            app.put(fSpecApp.data);
        }
        else
            return null;
    }
    app.put(popNewline(r, i));
    input.popFrontN(i - index);
    index = i;
    lineNumber = l;
    return to!string(app.data);
 }
 unittest
 {
    uint i;
    uint l;
    auto a = "#line 10\n";
    auto ar = lexSpecialTokenSequence(a, i, l);
    assert (ar == "#line 10\n");
    assert (a == "");
    assert (l == 10);
    auto b = "#line 9201 \"test.d\"\n";
    auto br = lexSpecialTokenSequence(b, i, l);
    assert (l == 9201);
    assert (br == "#line 9201 \"test.d\"\n");
    assert (b == "");
    auto c = `#lin`;
    auto cr = lexSpecialTokenSequence(c, i, l);
    assert (l == 9201);
    assert (cr is null);
    assert (c == `#lin`);
 }
 pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
 {
 	switch (ch)
@ -2364,3 +2510,5 @@ string generateCaseTrie(string[] args ...)
 	}
 	return printCaseStatements(t, "");
 }
 void main() {}