From 070f9ac83b448911c192cd0be2b6e31d313c6664 Mon Sep 17 00:00:00 2001
From: Hackerpilot <briancschott@gmail.com>
Date: Sun, 12 Jan 2014 02:45:37 +0000
Subject: [PATCH] Finished conversion to new lexer

---
 ctags.d         |   7 +-
 datapicked      |   2 +-
 main.d          |   2 +-
 stdx/d/ast.d    |   2 +-
 stdx/d/lexer.d  | 195 +++++++++++++++++++++++++++++-------------------
 stdx/d/parser.d |  53 +++++++++++--
 6 files changed, 172 insertions(+), 89 deletions(-)

diff --git a/ctags.d b/ctags.d
index db633df..7000833 100644
--- a/ctags.d
+++ b/ctags.d
@@ -24,7 +24,7 @@ void printCtags(File output, string[] fileNames)
 		File f = File(fileName);
 		auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size));
 		f.rawRead(bytes);
-		auto tokens = DLexer!(typeof(bytes))(bytes);
+		auto tokens = byToken(bytes);
 		Module m = parseModule(tokens.array, fileName, &doNothing);
 		auto printer = new CTagsPrinter;
 		printer.fileName = fileName;
@@ -40,9 +40,6 @@ void printCtags(File output, string[] fileNames)
 
 class CTagsPrinter : ASTVisitor
 {
-
-	alias ASTVisitor.visit visit;
-
 	override void visit(ClassDeclaration dec)
 	{
 		tagLines ~= "%s\t%s\t%d;\"\tc%s\n".format(dec.name.text, fileName, dec.name.line, context);
@@ -134,6 +131,8 @@ class CTagsPrinter : ASTVisitor
 		}
 		dec.accept(this);
 	}
+	
+	alias ASTVisitor.visit visit;
 
 	string fileName;
 	string[] tagLines;
diff --git a/datapicked b/datapicked
index eb14a52..f63a843 160000
--- a/datapicked
+++ b/datapicked
@@ -1 +1 @@
-Subproject commit eb14a5244153c0e13ceca79f292838dfe2ac9bfb
+Subproject commit f63a843e9c0ce8db7fd897684fe323697255d87d
diff --git a/main.d b/main.d
index 6ec2759..5ddf6a0 100644
--- a/main.d
+++ b/main.d
@@ -234,7 +234,7 @@ options:
         Prints the number of logical lines of code in the given
         source files. If no files are specified, input is read from stdin.
 
-    --tokenCount | t [sourceFiles]
+    --tokenCount | -t [sourceFiles]
         Prints the number of tokens in the given source files. If no files are
         specified, input is read from stdin.
 
diff --git a/stdx/d/ast.d b/stdx/d/ast.d
index ba948d0..adf4ccc 100644
--- a/stdx/d/ast.d
+++ b/stdx/d/ast.d
@@ -943,7 +943,7 @@ public:
             destructor, staticConstructor, staticDestructor,
             sharedStaticDestructor, sharedStaticConstructor,
             conditionalDeclaration, pragmaDeclaration, versionSpecification,
-            declarations));
+			invariant_, postblit, declarations));
     }
 
     /** */ Attribute[] attributes;
diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d
index 735054a..08c6e0c 100644
--- a/stdx/d/lexer.d
+++ b/stdx/d/lexer.d
@@ -18,7 +18,7 @@ private enum staticTokens = [
 private enum pseudoTokens = [
 	"\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6",
 	"7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
-	"\u2028", "\u2029"
+	"#line", "\u2028", "\u2029"
 ];
 
 private enum possibleDefaultTokens = [
@@ -343,15 +343,15 @@ public struct DLexer(R)
 		".", "lexDot",
 		"'", "lexCharacterLiteral",
 		"0", "lexNumber",
-		"1", "lexNumber",
-		"2", "lexNumber",
-		"3", "lexNumber",
-		"4", "lexNumber",
-		"5", "lexNumber",
-		"6", "lexNumber",
-		"7", "lexNumber",
-		"8", "lexNumber",
-		"9", "lexNumber",
+		"1", "lexDecimal",
+		"2", "lexDecimal",
+		"3", "lexDecimal",
+		"4", "lexDecimal",
+		"5", "lexDecimal",
+		"6", "lexDecimal",
+		"7", "lexDecimal",
+		"8", "lexDecimal",
+		"9", "lexDecimal",
 		"q\"", "lexDelimitedString",
 		"q{", "lexTokenString",
 		"r\"", "lexWysiwygString",
@@ -362,7 +362,8 @@ public struct DLexer(R)
 		"\n", "lexWhitespace",
 		"\u2028", "lexLongNewline",
 		"\u2029", "lexLongNewline",
-		"#!", "lexScriptLine"
+		"#!", "lexScriptLine",
+		"#line", "lexSpecialTokenSequence"
 	];
 
 	mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens,
@@ -437,7 +438,7 @@ public struct DLexer(R)
 
 	Token lexWhitespace() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		loop: do
 		{
 			switch (range.front)
@@ -475,13 +476,13 @@ public struct DLexer(R)
 				break loop;
 			}
 		} while (!range.empty);
-		return Token(tok!"whitespace", cast(string) range.slice(mark), range.line,
-			range.column, range.index);
+		return Token(tok!"whitespace", cast(string) range.slice(mark), line,
+			column, index);
 	}
 
 	Token lexNumber() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		auto lookahead = range.lookahead(2);
 		if (range.front == '0' && lookahead.length == 2)
 		{
@@ -491,27 +492,27 @@ public struct DLexer(R)
 			case 'X':
 				range.popFront();
 				range.popFront();
-				return lexHex(mark);
+				return lexHex(mark, line, column, index);
 			case 'b':
 			case 'B':
 				range.popFront();
 				range.popFront();
-				return lexBinary(mark);
+				return lexBinary(mark, line, column, index);
 			default:
-				return lexDecimal(mark);
+				return lexDecimal(mark, line, column, index);
 			}
 		}
 		else
-			return lexDecimal(mark);
+			return lexDecimal(mark, line, column, index);
 	}
 
 	Token lexHex() pure nothrow
 	{
-		auto mark = range.mark();
-		return lexHex(mark);
+		mixin (tokenStart);
+		return lexHex(mark, line, column, index);
 	}
 
-	Token lexHex(Mark mark) pure nothrow
+	Token lexHex(Mark mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		IdType type = tok!"intLiteral";
 		bool foundDot;
@@ -556,17 +557,17 @@ public struct DLexer(R)
 				break hexLoop;
 			}
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	Token lexBinary() pure nothrow
 	{
-		auto mark = range.mark();
-		return lexBinary(mark);
+		mixin (tokenStart);
+		return lexBinary(mark, line, column, index);
 	}
 
-	Token lexBinary(Mark mark) pure nothrow
+	Token lexBinary(Mark mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		IdType type = tok!"intLiteral";
 		binaryLoop: while (!range.empty)
@@ -587,11 +588,17 @@ public struct DLexer(R)
 				break binaryLoop;
 			}
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
-	Token lexDecimal(Mark mark) pure nothrow
+	Token lexDecimal()
+	{
+		mixin (tokenStart);
+		return lexDecimal(mark, line, column, index);
+	}
+	
+	Token lexDecimal(Mark mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		bool foundDot = range.front == '.';
 		IdType type = tok!"intLiteral";
@@ -665,8 +672,8 @@ public struct DLexer(R)
 				break decimalLoop;
 			}
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	void lexIntSuffix(ref IdType type) pure nothrow @safe
@@ -768,15 +775,27 @@ public struct DLexer(R)
 		}
 	}
 
-
-	Token lexSpecialTokenSequence() pure nothrow @safe
+	Token lexScriptLine() pure
 	{
-		assert (false, "Not implemented");
+		mixin (tokenStart);
+		while (!range.empty && !isNewline)
+			range.popFront();
+		return Token(tok!"scriptLine", cast(string) range.slice(mark),
+			line, column, index);
+	}
+	
+	Token lexSpecialTokenSequence() pure
+	{
+		mixin (tokenStart);
+		while (!range.empty && !isNewline)
+			range.popFront();
+		return Token(tok!"specialTokenSequence", cast(string) range.slice(mark),
+			line, column, index);
 	}
 
 	Token lexSlashStarComment() pure
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		IdType type = tok!"comment";
 		range.popFront();
 		range.popFront();
@@ -794,13 +813,13 @@ public struct DLexer(R)
 			else
 				popFrontWhitespaceAware();
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	Token lexSlashSlashComment() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		IdType type = tok!"comment";
 		range.popFront();
 		range.popFront();
@@ -810,13 +829,13 @@ public struct DLexer(R)
 				break;
 			range.popFront();
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	Token lexSlashPlusComment() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		IdType type = tok!"comment";
 		range.popFront();
 		range.popFront();
@@ -844,13 +863,13 @@ public struct DLexer(R)
 			else
 				popFrontWhitespaceAware();
 		}
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	Token lexStringLiteral() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		range.popFront();
 		while (true)
 		{
@@ -873,13 +892,13 @@ public struct DLexer(R)
 		}
 		IdType type = tok!"stringLiteral";
 		lexStringSuffix(type);
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	Token lexWysiwygString() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		IdType type = tok!"stringLiteral";
 		bool backtick = range.front == '`';
 		if (backtick)
@@ -927,8 +946,8 @@ public struct DLexer(R)
 			}
 		}
 		lexStringSuffix(type);
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	void lexStringSuffix(ref IdType type) pure
@@ -950,7 +969,7 @@ public struct DLexer(R)
 	Token lexDelimitedString() pure nothrow
 	{
         import std.traits;
-		auto mark = range.mark();
+		mixin (tokenStart);
 		range.popFront();
 		range.popFront();
 		Unqual!(ElementEncodingType!R) open;
@@ -961,29 +980,30 @@ public struct DLexer(R)
 			open = '<';
 			close = '>';
 			range.popFront();
-			return lexNormalDelimitedString(mark, open, close);
+			return lexNormalDelimitedString(mark, line, column, index, open, close);
 		case '{':
 			open = '{';
 			close = '}';
 			range.popFront();
-			return lexNormalDelimitedString(mark, open, close);
+			return lexNormalDelimitedString(mark, line, column, index, open, close);
 		case '[':
 			open = '[';
 			close = ']';
 			range.popFront();
-			return lexNormalDelimitedString(mark, open, close);
+			return lexNormalDelimitedString(mark, line, column, index, open, close);
 		case '(':
 			open = '(';
 			close = ')';
 			range.popFront();
-			return lexNormalDelimitedString(mark, open, close);
+			return lexNormalDelimitedString(mark, line, column, index, open, close);
 		default:
 			return lexHeredocString();
 		}
 	}
 
-	Token lexNormalDelimitedString(Mark mark, ElementEncodingType!R open,
-		ElementEncodingType!R close) pure nothrow
+	Token lexNormalDelimitedString(Mark mark, size_t line, size_t column,
+		size_t index, ElementEncodingType!R open, ElementEncodingType!R close)
+		pure nothrow
 	{
 		int depth = 1;
 		while (!range.empty && depth > 0)
@@ -1013,7 +1033,7 @@ public struct DLexer(R)
 		}
 		IdType type = tok!"stringLiteral";
 		lexStringSuffix(type);
-		return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index);
+		return Token(type, cast(string) range.slice(mark), line, column, index);
 	}
 
 	Token lexHeredocString() pure nothrow
@@ -1023,6 +1043,7 @@ public struct DLexer(R)
 
 	Token lexTokenString() pure
 	{
+		mixin (tokenStart);
 		assert(range.front == 'q');
 		range.popFront();
 		assert(range.front == '{');
@@ -1055,12 +1076,12 @@ public struct DLexer(R)
 		}
 		IdType type = tok!"stringLiteral";
 		lexStringSuffix(type);
-		return Token(type, app.data, range.line, range.column, range.index);
+		return Token(type, app.data, line, column, index);
 	}
 
 	Token lexHexString() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		range.popFront();
 		range.popFront();
 
@@ -1091,8 +1112,8 @@ public struct DLexer(R)
 
 		IdType type = tok!"stringLiteral";
 		lexStringSuffix(type);
-		return Token(type, cast(string) range.slice(mark), range.line, range.column,
-			range.index);
+		return Token(type, cast(string) range.slice(mark), line, column,
+			index);
 	}
 
 	bool lexEscapeSequence() pure nothrow
@@ -1190,7 +1211,7 @@ public struct DLexer(R)
 
 	Token lexCharacterLiteral() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		range.popFront();
 		if (range.front == '\\')
 		{
@@ -1201,7 +1222,7 @@ public struct DLexer(R)
 		{
 			range.popFront();
 			return Token(tok!"characterLiteral", cast(string) range.slice(mark),
-				range.line, range.column, range.index);
+				line, column, index);
 		}
 		else if (range.front & 0x80)
 		{
@@ -1219,7 +1240,7 @@ public struct DLexer(R)
 		{
 			range.popFront();
 			return Token(tok!"characterLiteral", cast(string) range.slice(mark),
-				range.line, range.column, range.index);
+				line, column, index);
 		}
 		else
 		{
@@ -1230,22 +1251,23 @@ public struct DLexer(R)
 
 	Token lexIdentifier() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		while (!range.empty && !isSeparating(range.front))
 		{
 			range.popFront();
 		}
-		return Token(tok!"identifier", cast(string) range.slice(mark), range.line,
-			range.column, range.index);
+		return Token(tok!"identifier", cast(string) range.slice(mark), line,
+			column, index);
 	}
 
 	Token lexDot() pure nothrow
 	{
+		mixin (tokenStart);
 		auto lookahead = range.lookahead(1);
 		if (lookahead.length == 0)
 		{
 			range.popFront();
-			return Token(tok!".", null, range.line, range.column, range.index);
+			return Token(tok!".", null, line, column, index);
 		}
 		switch (lookahead[0])
 		{
@@ -1257,30 +1279,36 @@ public struct DLexer(R)
 			if (!range.empty && range.front == '.')
 			{
 				range.popFront();
-				return Token(tok!"...", null, range.line, range.column, range.index);
+				return Token(tok!"...", null, line, column, index);
 			}
 			else
-				return Token(tok!"..", null, range.line, range.column, range.index);
+				return Token(tok!"..", null, line, column, index);
 		default:
 			range.popFront();
-			return Token(tok!".", null, range.line, range.column, range.index);
+			return Token(tok!".", null, line, column, index);
 		}
 	}
 
 	Token lexLongNewline() pure nothrow
 	{
-		auto mark = range.mark();
+		mixin (tokenStart);
 		range.popFront();
 		range.popFront();
 		range.popFront();
 		range.incrementLine();
-		return Token(tok!"whitespace", cast(string) range.slice(mark), range.line,
-			range.column, range.index);
+		return Token(tok!"whitespace", cast(string) range.slice(mark), line,
+			column, index);
 	}
 
-	Token lexScriptLine() pure nothrow
+	bool isNewline() pure @safe
 	{
-		assert(false, "Not implemented");
+		if (range.front == '\n') return true;
+		if (range.front == '\r') return true;
+		auto lookahead = range.lookahead(3);
+		if (lookahead.length == 0) return false;
+		if (lookahead.startsWith("\u2028") || lookahead.startsWith("\u2029"))
+			return true;
+		return false;
 	}
 
 	bool isSeparating(ElementType!R c) nothrow pure @safe
@@ -1290,10 +1318,23 @@ public struct DLexer(R)
 		if (c >= '[' && c <= '^') return true;
 		if (c >= '{' && c <= '~') return true;
 		if (c == '`') return true;
+//		if (c & 0x80 && (range.lookahead(3).startsWith("\u2028")
+//			|| range.lookahead(3).startsWith("\u2029"))) return true;
 		return false;
 	}
 
+	enum tokenStart = q{
+		size_t index = range.index;
+		size_t column = range.column;
+		size_t line = range.line;
+		auto mark = range.mark();
+	};
+	
 	void error(...) pure {
 
 	}
+	
+	void warning(...) pure {
+		
+	}
 }
diff --git a/stdx/d/parser.d b/stdx/d/parser.d
index 38ded73..fbdc62d 100644
--- a/stdx/d/parser.d
+++ b/stdx/d/parser.d
@@ -96,7 +96,6 @@ class Parser
 
     unittest
     {
-        stderr.writeln("Running unittest for parseAliasDeclaration.");
         auto sourceCode =
 q{
 alias core.sys.posix.stdio.fileno fileno;
@@ -128,6 +127,16 @@ alias core.sys.posix.stdio.fileno fileno;
         node.type = parseType();
         return node;
     }
+	
+	unittest
+    {
+        auto sourceCode = q{a = abcde!def};
+        Parser p = getParserForUnittest(sourceCode, "parseAliasInitializer");
+        auto initializer = p.parseAliasInitializer();
+        assert (initializer !is null);
+        assert (p.errorCount == 0);
+        stderr.writeln("Unittest for parseAliasInitializer() passed.");
+    }
 
     /**
      * Parses an AliasThisDeclaration
@@ -147,6 +156,16 @@ alias core.sys.posix.stdio.fileno fileno;
         if (expect(tok!";") is null) return null;
         return node;
     }
+    
+    unittest
+    {
+        auto sourceCode = q{alias oneTwoThree this;};
+        Parser p = getParserForUnittest(sourceCode, "parseAliasThisDeclaration");
+        auto aliasThis = p.parseAliasThisDeclaration();
+        assert (aliasThis !is null);
+        assert (p.errorCount == 0);
+        stderr.writeln("Unittest for parseAliasThisDeclaration() passed.");
+    }
 
     /**
      * Parses an AlignAttribute.
@@ -169,6 +188,18 @@ alias core.sys.posix.stdio.fileno fileno;
         }
         return node;
     }
+    
+    unittest
+    {
+        auto sourceCode = q{align(42) align};
+        Parser p = getParserForUnittest(sourceCode, "parseAlignAttribute");
+        auto attribute = p.parseAlignAttribute();
+        assert (attribute !is null);
+        attribute = p.parseAlignAttribute();
+        assert (attribute !is null);
+        assert (p.errorCount == 0);
+        stderr.writeln("Unittest for parseAlignAttribute() passed.");
+    }
 
     /**
      * Parses an AndAndExpression
@@ -3098,6 +3129,16 @@ invariant() foo();
         if (expect(tok!")") is null) return null;
         return node;
     }
+	
+	unittest
+    {
+        auto sourceCode = q{is ( x : uybte)}c;
+        Parser p = getParserForUnittest(sourceCode, "parseIsExpression");
+        auto isExp1 = p.parseIsExpression();
+        assert (isExp1 !is null);
+        assert (p.errorCount == 0);
+		stderr.writeln("Unittest for parseIsExpression passed.");
+    }
 
     /**
      * Parses a KeyValuePair
@@ -3369,6 +3410,10 @@ invariant() foo();
             node.symbol = parseSymbol();
         return node;
     }
+    
+    unittest
+    {
+    }
 
     /**
      * Parses a Module
@@ -5951,8 +5996,6 @@ protected:
         return hasMagicDelimiter!(tok!":")();
     }
 
-
-
     bool hasMagicDelimiter(alias T)()
     {
         mixin(traceEnterAndExit!(__FUNCTION__));
@@ -6432,14 +6475,14 @@ protected:
     }
 
     version (unittest) static void doNothingErrorFunction(string fileName,
-        int line, int column, string message) {}
+        size_t line, size_t column, string message) {}
 
     version (unittest) static Parser getParserForUnittest(string sourceCode,
         string testName)
     {
         auto r = byToken(cast(ubyte[]) sourceCode);
         Parser p = new Parser;
-        //p.messageFunction = &doNothingErrorFunction;
+        p.messageFunction = &doNothingErrorFunction;
         p.fileName = testName ~ ".d";
         p.tokens = r.array();
         return p;