special token sequence

2013-01-22 17:42:26 -08:00 · 2013-01-22 17:42:26 -08:00 · bd97d1b393
parent fbfdc37cf5
commit bd97d1b393
1 changed files with 263 additions and 115 deletions
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -4,7 +4,7 @@
 * This module contains a range-based lexer for the D programming language.
 *
 * Copyright: Brian Schott 2013
- * License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
+ * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
 * Authors: Brian Schott
 * Source: $(PHOBOSSRC std/d/_lexer.d)
 */
@ -78,9 +78,11 @@ enum IterationStyle
 	/// Only include code, not whitespace or comments
 	CodeOnly = 0,
 	/// Includes comments
-	IncludeComments = 0b01,
+	IncludeComments = 0b0001,
 	/// Includes whitespace
-	IncludeWhitespace = 0b10,
+	IncludeWhitespace = 0b0010,
+    /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
+    IncludeSpecialTokens = 0b0100,
 	/// Include everything
 	Everything = IncludeComments | IncludeWhitespace
 }
@ -246,7 +248,6 @@ class TokenRange(R) : InputRange!(Token)
 			"=>",   "TokenType.GoesTo",
 			">",    "TokenType.Greater",
 			">=",   "TokenType.GreaterEqual",
-			"#",    "TokenType.Hash",
 			"&&",   "TokenType.LogicAnd",
 			"{",    "TokenType.LBrace",
 			"[",    "TokenType.LBracket",
@ -337,6 +338,15 @@ class TokenRange(R) : InputRange!(Token)
 			case '*':
 			case '+':
 				current = lexComment(range, index, lineNumber);
+                if (!(iterStyle & IterationStyle.IncludeComments))
+                {
+                    if (range.empty)
+                    {
+                        _empty = true;
+                        return;
+                    }
+                    popFront();
+                }
 				break outer;
 			case '=':
 				current.type = TokenType.DivEquals;
@ -372,6 +382,31 @@ class TokenRange(R) : InputRange!(Token)
 			}
 			else
 				goto default;
+        case '#':
+            string special = lexSpecialTokenSequence(range, index, lineNumber);
+            if (special)
+            {
+                current.type = TokenType.SpecialTokenSequence;
+                current.value = special;
+                if (!(iterStyle & IterationStyle.IncludeSpecialTokens))
+                {
+                    if (range.empty)
+                    {
+                        _empty = true;
+                        return;
+                    }
+                    popFront();
+                }
+            }
+            else
+            {
+                current.type = TokenType.Hash;
+                current.value = "#";
+                range.popFront();
+				++index;
+				break;
+            }
+            break;
 		default:
 			auto app = appender!(ElementType!(R)[])();
 			while(!range.isEoF() && !isSeparating(range.front))
@ -396,6 +431,14 @@ private:
 	StringStyle stringStyle;
 }

+unittest
+{
+    import std.stdio;
+    auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
+    foreach (t; byToken(a))
+        writeln(t);
+}
+
 /**
 * Listing of all the tokens in the D language.
 *
@ -493,130 +536,129 @@ enum TokenType: uint

 	// Types
 	TYPES_BEGIN, ///
-	Bool, /// bool,
-	Byte, /// byte,
-	Cdouble, /// cdouble,
-	Cent, /// cent,
-	Cfloat, /// cfloat,
-	Char, /// char,
-	Creal, /// creal,
-	Dchar, /// dchar,
-	Double, /// double,
+	Bool, /// bool
+	Byte, /// byte
+	Cdouble, /// cdouble
+	Cent, /// cent
+	Cfloat, /// cfloat
+	Char, /// char
+	Creal, /// creal
+	Dchar, /// dchar
+	Double, /// double
 	DString, /// dstring
-	Float, /// float,
-	Function, /// function,
-	Idouble, /// idouble,
-	Ifloat, /// ifloat,
-	Int, /// int,
-	Ireal, /// ireal,
-	Long, /// long,
-	Real, /// real,
-	Short, /// short,
+	Float, /// float
+	Function, /// function
+	Idouble, /// idouble
+	Ifloat, /// ifloat
+	Int, /// int
+	Ireal, /// ireal
+	Long, /// long
+	Real, /// real
+	Short, /// short
 	String, /// string
-	Ubyte, /// ubyte,
-	Ucent, /// ucent,
-	Uint, /// uint,
-	Ulong, /// ulong,
-	Ushort, /// ushort,
-	Void, /// void,
-	Wchar, /// wchar,
+	Ubyte, /// ubyte
+	Ucent, /// ucent
+	Uint, /// uint
+	Ulong, /// ulong
+	Ushort, /// ushort
+	Void, /// void
+	Wchar, /// wchar
 	WString, /// wstring
 	TYPES_END, ///

-	Template, /// template,
+	Template, /// template

 	// Keywords
 	KEYWORDS_BEGIN, ///
 		ATTRIBUTES_BEGIN, ///
-		Align, /// align,
-		Deprecated, /// deprecated,
-		Extern, /// extern,
-		Pragma, /// pragma,
+		Align, /// align
+		Deprecated, /// deprecated
+		Extern, /// extern
+		Pragma, /// pragma
 			PROTECTION_BEGIN, ///
-			Export, /// export,
-			Package, /// package,
-			Private, /// private,
-			Protected, /// protected,
-			Public, /// public,
+			Export, /// export
+			Package, /// package
+			Private, /// private
+			Protected, /// protected
+			Public, /// public
 			PROTECTION_END, ///
-		Abstract, /// abstract,
-		AtDisable, /// @disable
-		Auto, /// auto,
-		Const, /// const,
+		Abstract, /// abstract
+		Auto, /// auto
+		Const, /// const
 		Final, /// final
-		Gshared, /// __gshared,
-		Immutable, // immutable,
-		Inout, // inout,
-		Scope, /// scope,
-		Shared, // shared,
-		Static, /// static,
-		Synchronized, /// synchronized,
+		Gshared, /// __gshared
+		Immutable, // immutable
+		Inout, // inout
+		Scope, /// scope
+		Shared, // shared
+		Static, /// static
+		Synchronized, /// synchronized
 		ATTRIBUTES_END, ///
-	Alias, /// alias,
-	Asm, /// asm,
-	Assert, /// assert,
-	Body, /// body,
-	Break, /// break,
-	Case, /// case,
-	Cast, /// cast,
-	Catch, /// catch,
-	Class, /// class,
-	Continue, /// continue,
-	Debug, /// debug,
-	Default, /// default,
-	Delegate, /// delegate,
-	Delete, /// delete,
-	Do, /// do,
-	Else, /// else,
-	Enum, /// enum,
-	False, /// false,
-	Finally, /// finally,
-	Foreach, /// foreach,
-	Foreach_reverse, /// foreach_reverse,
-	For, /// for,
-	Goto, /// goto,
-	If, /// if ,
-	Import, /// import,
-	In, /// in,
-	Interface, /// interface,
-	Invariant, /// invariant,
-	Is, /// is,
-	Lazy, /// lazy,
-	Macro, /// macro,
-	Mixin, /// mixin,
-	Module, /// module,
-	New, /// new,
-	Nothrow, /// nothrow,
-	Null, /// null,
-	Out, /// out,
-	Override, /// override,
-	Pure, /// pure,
-	Ref, /// ref,
-	Return, /// return,
-	Struct, /// struct,
-	Super, /// super,
-	Switch, /// switch ,
-	This, /// this,
-	Throw, /// throw,
-	True, /// true,
-	Try, /// try,
-	Typedef, /// typedef,
-	Typeid, /// typeid,
-	Typeof, /// typeof,
-	Union, /// union,
-	Unittest, /// unittest,
-	Version, /// version,
-	Volatile, /// volatile,
-	While, /// while ,
-	With, /// with,
+	Alias, /// alias
+	Asm, /// asm
+	Assert, /// assert
+	Body, /// body
+	Break, /// break
+	Case, /// case
+	Cast, /// cast
+	Catch, /// catch
+	Class, /// class
+	Continue, /// continue
+	Debug, /// debug
+	Default, /// default
+	Delegate, /// delegate
+	Delete, /// delete
+	Do, /// do
+	Else, /// else
+	Enum, /// enum
+	False, /// false
+	Finally, /// finally
+	Foreach, /// foreach
+	Foreach_reverse, /// foreach_reverse
+	For, /// for
+	Goto, /// goto
+	If, /// if
+	Import, /// import
+	In, /// in
+	Interface, /// interface
+	Invariant, /// invariant
+	Is, /// is
+	Lazy, /// lazy
+	Macro, /// macro
+	Mixin, /// mixin
+	Module, /// module
+	New, /// new
+	Nothrow, /// nothrow
+	Null, /// null
+	Out, /// out
+	Override, /// override
+	Pure, /// pure
+	Ref, /// ref
+	Return, /// return
+	Struct, /// struct
+	Super, /// super
+	Switch, /// switch
+	This, /// this
+	Throw, /// throw
+	True, /// true
+	Try, /// try
+	Typedef, /// typedef
+	Typeid, /// typeid
+	Typeof, /// typeof
+	Union, /// union
+	Unittest, /// unittest
+	Version, /// version
+	Volatile, /// volatile
+	While, /// while
+	With, /// with
 	KEYWORDS_END, ///

 	// Constants
-	CONSTANTS_BEGIN,
-	File, /// __FILE__,
-	Line, /// __LINE__,
-	Thread, /// __thread,
-	Traits, /// __traits,
+	CONSTANTS_BEGIN, ///
+	File, /// __FILE__
+	Line, /// __LINE__
+	Thread, /// __thread
+	Traits, /// __traits
 	CONSTANTS_END, ///

 	// Misc
@ -625,6 +667,7 @@ enum TokenType: uint
 	Identifier, /// anything else
 	ScriptLine, // Line at the beginning of source file that starts from #!
 	Whitespace, /// whitespace
+    SpecialTokenSequence, /// #line 10 "file.d"
 	MISC_END, ///

 	// Literals
@ -1429,11 +1472,11 @@ body
 	int depth = 1;
 	while (!r.empty)
 	{
-		if (r.front == TokenType.LBrace)
+		if (r.front.type == TokenType.LBrace)
 		{
 			++depth;
 		}
-		else if (r.front == TokenType.RBrace)
+		else if (r.front.type == TokenType.RBrace)
 		{
 			--depth;
 			if (depth <= 0)
@ -1479,7 +1522,7 @@ unittest
 {
 	uint i;
 	uint l;
-	auto a = "q{import std.stdio;}";
+	auto a = "q{import std.stdio;} abcd";
 	auto ar = lexTokenString(a, i, l);
 	assert (ar == TokenType.StringLiteral);
 	assert (ar == "import std.stdio;");
@ -2071,6 +2114,109 @@ unittest
 	assert (pr == TokenType.DoubleLiteral);
 }

+string lexSpecialTokenSequence(R)(ref R input, ref uint index,
+    ref uint lineNumber)
+in
+{
+    assert (input.front == '#');
+}
+body
+{
+    auto i = index;
+    auto r = input.save;
+    auto l = lineNumber;
+    r.popFront();
+    ++i;
+    auto app = appender!(ElementType!(R)[])();
+    app.put('#');
+
+    auto specialType = appender!(ElementType!(R)[])();
+
+    while (!r.empty && !isSeparating(r.front))
+    {
+        specialType.put(r.front);
+        ++i;
+        r.popFront();
+    }
+
+    if (to!string(specialType.data) != "line")
+        return null;
+    app.put(specialType.data);
+
+    if (std.uni.isWhite(r.front))
+        app.put(lexWhitespace(r, i, l).value);
+
+
+    if (!isDigit(r.front))
+        return null;
+
+    auto t = lexNumber(r, i, l);
+    if (t != TokenType.IntLiteral)
+        return null;
+
+    app.put(t.value);
+    l = to!uint(t.value);
+
+    if (!isNewline(r))
+    {
+        if (!r.empty && std.uni.isWhite(r.front))
+            app.put(lexWhitespace(r, i, l).value);
+
+        if (!r.empty && r.front == '"')
+        {
+            auto fSpecApp = appender!(ElementType!(R)[])();
+            fSpecApp.put(r.front);
+            r.popFront();
+            ++i;
+            while (!r.empty)
+            {
+                if (r.front == '"')
+                {
+                    fSpecApp.put('"');
+                    ++i;
+                    r.popFront();
+                    break;
+                }
+                ++i;
+                fSpecApp.put(r.front);
+                r.popFront();
+            }
+            app.put(fSpecApp.data);
+        }
+        else
+            return null;
+    }
+
+    app.put(popNewline(r, i));
+    input.popFrontN(i - index);
+    index = i;
+    lineNumber = l;
+    return to!string(app.data);
+}
+
+unittest
+{
+    uint i;
+    uint l;
+    auto a = "#line 10\n";
+    auto ar = lexSpecialTokenSequence(a, i, l);
+    assert (ar == "#line 10\n");
+    assert (a == "");
+    assert (l == 10);
+
+    auto b = "#line 9201 \"test.d\"\n";
+    auto br = lexSpecialTokenSequence(b, i, l);
+    assert (l == 9201);
+    assert (br == "#line 9201 \"test.d\"\n");
+    assert (b == "");
+
+    auto c = `#lin`;
+    auto cr = lexSpecialTokenSequence(c, i, l);
+    assert (l == 9201);
+    assert (cr is null);
+    assert (c == `#lin`);
+}
+
 pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
 {
 	switch (ch)
@ -2364,3 +2510,5 @@ string generateCaseTrie(string[] args ...)
 	}
 	return printCaseStatements(t, "");
 }
+
+void main() {}