Tokenizer is somewhat functional

2013-01-18 00:34:59 -08:00 · 2013-01-18 00:34:59 -08:00 · a7f81c57cc
parent e3c737f6e1
commit a7f81c57cc
3 changed files with 229 additions and 282 deletions
--- a/codegen.d
+++ b/codegen.d
@ -53,15 +53,13 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
 		caseStatement ~= k;
 		caseStatement ~= "';\n";
 		caseStatement ~= indentString;
-		caseStatement ~= "\tcurrent.lineNumber = lineNumber;\n";
-		caseStatement ~= indentString;
 		caseStatement ~= "\t++index;\n";
 		caseStatement ~= indentString;
-		caseStatement ~= "\tinput.popFront();\n";
+		caseStatement ~= "\trange.popFront();\n";
 		if (v.children.length > 0)
 		{
 			caseStatement ~= indentString;
-			caseStatement ~= "\tif (isEoF(inputString, endIndex))\n";
+			caseStatement ~= "\tif (range.isEoF())\n";
 			caseStatement ~= indentString;
 			caseStatement ~= "\t{\n";
 			caseStatement ~= indentString;
@ -72,7 +70,7 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
 			caseStatement ~= indentString;
 			caseStatement ~= "\t}\n";
 			caseStatement ~= indentString;
-			caseStatement ~= "\tswitch (input.front)\n";
+			caseStatement ~= "\tswitch (range.front)\n";
 			caseStatement ~= indentString;
 			caseStatement ~= "\t{\n";
 			caseStatement ~= printCaseStatements(v, indentString ~ "\t");
--- a/langutils.d
+++ b/langutils.d
@ -110,7 +110,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
 	case 5:
 		switch (input)
 		{
-		case "@safe": return TokenType.AtSafe;
 		case "alias": return TokenType.Alias;
 		case "align": return TokenType.Align;
 		case "break": return TokenType.Break;
@ -169,7 +168,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
 	case 7:
 		switch (input)
 		{
-		case "@system": return TokenType.AtSystem;
 		case "cdouble": return TokenType.Cdouble;
 		case "default": return TokenType.Default;
 		case "dstring": return TokenType.DString;
@ -196,9 +194,7 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
 		case "__thread": return TokenType.Thread;
 		case "__traits": return TokenType.Traits;
 		case "volatile": return TokenType.Volatile;
-		case "@trusted": return TokenType.AtTrusted;
 		case "delegate": return TokenType.Delegate;
-		case "@disable": return TokenType.AtDisable;
 		case "function": return TokenType.Function;
 		case "unittest": return TokenType.Unittest;
 		case "__FILE__": return TokenType.File;
@ -209,7 +205,6 @@ pure nothrow TokenType lookupTokenTypeOptimized(const string input)
 		switch (input)
 		{
 		case "__gshared": return TokenType.Gshared;
-		case "@property": return TokenType.AtProperty;
 		case "immutable": return TokenType.Immutable;
 		case "interface": return TokenType.Interface;
 		case "invariant": return TokenType.Invariant;
@ -243,6 +238,7 @@ enum TokenType: uint
 // Operators
 	OPERATORS_BEGIN,
 	Assign,	/// =
+	At, /// @
 	BitAnd,	/// &
 	BitAndEquals,	/// &=
 	BitOr,	/// |
@ -433,14 +429,6 @@ enum TokenType: uint
 	Traits, /// __traits,
 	CONSTANTS_END,

-// Properties
-	PROPERTIES_BEGIN,
-	AtProperty, /// @property
-	AtSafe, /// @safe
-	AtSystem, /// @system
-	AtTrusted, /// @trusted
-	PROPERTIES_END,
-
 // Misc
 	MISC_BEGIN,
 	Blank, /// unknown token type
@ -505,7 +493,6 @@ static this()
 		"delegate" : TokenType.Delegate,
 		"delete" : TokenType.Delete,
 		"deprecated" : TokenType.Deprecated,
-		"@disable" : TokenType.AtDisable,
 		"do" : TokenType.Do,
 		"double" : TokenType.Double,
 		"dstring" : TokenType.DString,
@ -550,14 +537,12 @@ static this()
 		"package" : TokenType.Package,
 		"pragma" : TokenType.Pragma,
 		"private" : TokenType.Private,
-		"@property" : TokenType.AtProperty,
 		"protected" : TokenType.Protected,
 		"public" : TokenType.Public,
 		"pure" : TokenType.Pure,
 		"real" : TokenType.Real,
 		"ref" : TokenType.Ref,
 		"return" : TokenType.Return,
-		"@safe" : TokenType.AtSafe,
 		"scope" : TokenType.Scope,
 		"shared" : TokenType.Shared,
 		"short" : TokenType.Short,
@ -567,14 +552,12 @@ static this()
 		"super" : TokenType.Super,
 		"switch" : TokenType.Switch,
 		"synchronized" : TokenType.Synchronized,
-		"@system" : TokenType.AtSystem,
 		"template" : TokenType.Template,
 		"this" : TokenType.This,
 		"__thread" : TokenType.Thread,
 		"throw" : TokenType.Throw,
 		"__traits" : TokenType.Traits,
 		"true" : TokenType.True,
-		"@trusted" : TokenType.AtTrusted,
 		"try" : TokenType.Try,
 		"typedef" : TokenType.Typedef,
 		"typeid" : TokenType.Typeid,
--- a/tokenizer.d
+++ b/tokenizer.d
@ -14,6 +14,7 @@ import std.uni;
 import std.stdio;
 import std.ascii;
 import std.format;
+import std.exception;

 import langutils;
 import codegen;
@ -29,9 +30,9 @@ pure bool isEoF(R)(R range)
 	return range.empty || range.front == 0 || range.front == 0x1a;
 }

-char[] popNewline(R)(ref R range, ref uint index)
+C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index) if (isSomeChar!C && isForwardRange!R)
 {
-	char[] chars;
+	C[] chars;
 	if (range.front == '\r')
 	{
 		chars ~= range.front;
@ -58,13 +59,14 @@ unittest
 /**
 * Returns:
 */
-Token lexWhitespace(R)(ref R range, ref uint index, ref uint lineNumber)
+Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index, ref uint lineNumber)
+	if (isForwardRange!R && isSomeChar!C)
 {
 	Token t;
 	t.type = TokenType.Whitespace;
 	t.lineNumber = lineNumber;
 	t.startIndex = index;
-	auto app = appender!(char[])();
+	auto app = appender!(C[])();
 	while (!isEoF(range) && std.uni.isWhite(range.front))
 	{
 		if (isNewline(range))
@ -104,7 +106,8 @@ unittest
 *     lineNumber = the line number that corresponds to endIndex
 * Returns: The comment
 */
-Token lexComment(R)(ref R input, ref uint index, ref uint lineNumber)
+Token lexComment(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber)
+	if (isSomeChar!C && isForwardRange!R)
 in
 {
 	assert (input.front == '/');
@ -115,7 +118,7 @@ body
 	t.lineNumber = lineNumber;
 	t.type = TokenType.Comment;
 	t.startIndex = index;
-	auto app = appender!(char[])();
+	auto app = appender!(C[])();
 	app.put(input.front);
 	input.popFront();
 	switch(input.front)
@ -252,10 +255,10 @@ unittest
 /**
 * Pops up to upTo hex chars from the input range and returns them as a string
 */
-string popDigitChars(R, alias isInterestingDigit)(ref R input, ref uint index,
-	uint upTo)
+string popDigitChars(R, C = ElementType!R, alias isInterestingDigit)(ref R input, ref uint index,
+	uint upTo) if (isSomeChar!C && isForwardRange!R)
 {
-	auto app = appender!(char[])();
+	auto app = appender!(C[])();
 	for (uint i = 0; i != upTo; ++i)
 	{
 		if (isInterestingDigit(input.front))
@ -271,12 +274,12 @@ string popDigitChars(R, alias isInterestingDigit)(ref R input, ref uint index,

 string popHexChars(R)(ref R input, ref uint index, uint upTo)
 {
-	return popDigitChars!(R, isHexDigit)(input, index, upTo);
+	return popDigitChars!(R, ElementType!R, isHexDigit)(input, index, upTo);
 }

 string popOctalChars(R)(ref R input, ref uint index, uint upTo)
 {
-	return popDigitChars!(R, isOctalDigit)(input, index, upTo);
+	return popDigitChars!(R, ElementType!R, isOctalDigit)(input, index, upTo);
 }

 unittest
@ -297,7 +300,8 @@ unittest
 	assert (rc == "00123");
 }

-string interpretEscapeSequence(R)(ref R input, ref uint index)
+string interpretEscapeSequence(R, C = ElementType!R)(ref R input, ref uint index)
+	if (isSomeChar!C && isForwardRange!R)
 in
 {
 	assert(input.front == '\\');
@ -391,17 +395,8 @@ unittest
 		assert (interpretEscapeSequence(k, i) == v);
 }

-/**
- * Params:
- *     inputString = the source code to examine
- *     endIndex = an index into inputString at the opening quote
- *     lineNumber = the line number that corresponds to endIndex
- *     quote = the opening (and closing) quote character for the string to be
- *         lexed
- * Returns: a string literal, including its opening and closing quote characters
- */
-Token lexString(R)(ref R input, ref uint lineNumber, ref uint index,
-	bool canEscape = true)
+Token lexString(R)(ref R input, ref uint index, ref uint lineNumber,
+	const StringStyle style = StringStyle.Escaped)
 in
 {
 	assert (input.front == '\'' || input.front == '"' || input.front == '`');
@ -411,10 +406,13 @@ body
 	Token t;
 	t.lineNumber = lineNumber;
 	t.startIndex = index;
+	t.type = TokenType.StringLiteral;
 	auto quote = input.front;
 	input.popFront();
 	++index;
 	auto app = appender!(char[])();
+	if (style & StringStyle.IncludeQuotes)
+		app.put(quote);
 	while (!isEoF(input))
 	{
 		if (isNewline(input))
@ -422,10 +420,12 @@ body
 			app.put(popNewline(input, index));
 			lineNumber++;
 		}
-		else if (input.front == '\\' && canEscape)
+		else if (input.front == '\\' && style & StringStyle.Escaped)
 			app.put(interpretEscapeSequence(input, index));
 		else if (input.front == quote)
 		{
+			if (style & StringStyle.IncludeQuotes)
+				app.put(quote);
 			input.popFront();
 			++index;
 			break;
@ -443,20 +443,17 @@ body
 		{
 		case 'w':
 			t.type = TokenType.WStringLiteral;
-			input.popFront();
-			++index;
-			break;
+			goto case 'c';
 		case 'd':
 			t.type = TokenType.DStringLiteral;
+			goto case 'c';
+		case 'c':
+			if (style & StringStyle.IncludeQuotes)
+				app.put(input.front);
 			input.popFront();
 			++index;
 			break;
-		case 'c':
-			input.popFront();
-			++index;
-			goto default;
 		default:
-			t.type = TokenType.StringLiteral;
 			break;
 		}
 	}
@ -473,7 +470,7 @@ unittest
 	auto b = "\"ab\\ncd\"";
 	assert (lexString(b, i, l) == "ab\ncd");
 	auto c = "`abc\\ndef`";
-	assert (lexString(c, i, l, false) == "abc\\ndef");
+	assert (lexString(c, i, l, StringStyle.NotEscaped) == "abc\\ndef");
 	auto d = `"12345"w`;
 	assert (lexString(d, i, l).type == TokenType.WStringLiteral);
 	auto e = `"abc"c`;
@ -1091,32 +1088,214 @@ pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
 enum IterationStyle
 {
 	/// Only include code, not whitespace or comments
-	CODE_ONLY,
+	CodeOnly = 0,
+	/// Includes comments
+	IncludeComments = 1,
+	/// Includes whitespace
+	IncludeWhitespace = 2 << 1,
 	/// Include everything
-	EVERYTHING
+	Everything = IncludeComments | IncludeWhitespace
 }

-struct TokenRange(R) if (isInputRange(R))
+/**
+ * Configuration of the token lexing style
+ */
+enum StringStyle : uint
+{
+	NotEscaped = 0,
+	/// String escape sequences will be processed and enclosing quote characters
+	/// will not be preserved.
+	Escaped = 1,
+	/// Strings will be read exactly as they appeared in the source, including
+	/// their opening and closing quote characters. Useful for syntax highlighting.
+	IncludeQuotes = 2,
+}
+
+TokenRange!(R) byToken(R)(ref R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
+	const StringStyle tokenStyle = StringStyle.Escaped) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
+{
+	auto r = TokenRange!(R)(range);
+	r.tokenStyle = tokenStyle;
+	r.iterStyle = iterationStyle;
+	r.lineNumber = 1;
+	r.popFront();
+	return r;
+}
+
+struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
 {
 	this(ref R range)
 	{
 		this.range = range;
 	}

-	bool empty() const @property
+	bool empty() @property
 	{
 		return _empty;
 	}

 	Token front() const @property
 	{
+		enforce(!_empty, "Cannot call popFront() on empty token range");
 		return current;
 	}

 	Token popFront()
 	{
-		Token c = current;
+		if (range.isEoF())
+		{
+			_empty = true;
+			return current;
+		}

+		Token c = current;
+		current = Token.init;
+		current.lineNumber = lineNumber;
+		current.startIndex = index;
+
+		while (std.uni.isWhite(range.front))
+		{
+			if (iterStyle == IterationStyle.Everything)
+			{
+				current = lexWhitespace(range, index, lineNumber);
+				break;
+			}
+			else
+				lexWhitespace(range, index, lineNumber);
+		}
+		outer: switch (range.front)
+		{
+		mixin(generateCaseTrie(
+			"=",    "TokenType.Assign",
+			"&",    "TokenType.BitAnd",
+			"&=",   "TokenType.BitAndEquals",
+			"|",    "TokenType.BitOr",
+			"|=",   "TokenType.BitOrEquals",
+			"~=",   "TokenType.CatEquals",
+			":",    "TokenType.Colon",
+			",",    "TokenType.Comma",
+			"$",    "TokenType.Dollar",
+			".",    "TokenType.Dot",
+			"==",   "TokenType.Equals",
+			"=>",   "TokenType.GoesTo",
+			">",    "TokenType.Greater",
+			">=",   "TokenType.GreaterEqual",
+			"#",    "TokenType.Hash",
+			"&&",   "TokenType.LogicAnd",
+			"{",    "TokenType.LBrace",
+			"[",    "TokenType.LBracket",
+			"<",    "TokenType.Less",
+			"<=",   "TokenType.LessEqual",
+			"<>=",  "TokenType.LessEqualGreater",
+			"<>",   "TokenType.LessOrGreater",
+			"||",   "TokenType.LogicOr",
+			"(",    "TokenType.LParen",
+			"-",    "TokenType.Minus",
+			"-=",   "TokenType.MinusEquals",
+			"%",    "TokenType.Mod",
+			"%=",   "TokenType.ModEquals",
+			"*=",   "TokenType.MulEquals",
+			"!",    "TokenType.Not",
+			"!=",   "TokenType.NotEquals",
+			"!>",   "TokenType.NotGreater",
+			"!>=",  "TokenType.NotGreaterEqual",
+			"!<",   "TokenType.NotLess",
+			"!<=",  "TokenType.NotLessEqual",
+			"!<>",  "TokenType.NotLessEqualGreater",
+			"+",    "TokenType.Plus",
+			"+=",   "TokenType.PlusEquals",
+			"^^",   "TokenType.Pow",
+			"^^=",  "TokenType.PowEquals",
+			"}",    "TokenType.RBrace",
+			"]",    "TokenType.RBracket",
+			")",    "TokenType.RParen",
+			";",    "TokenType.Semicolon",
+			"<<",   "TokenType.ShiftLeft",
+			"<<=",  "TokenType.ShiftLeftEqual",
+			">>",   "TokenType.ShiftRight",
+			">>=",  "TokenType.ShiftRightEqual",
+			"..",   "TokenType.Slice",
+			"*",    "TokenType.Star",
+			"?",    "TokenType.Ternary",
+			"~",    "TokenType.Tilde",
+			"--",   "TokenType.Decrement",
+			"!<>=", "TokenType.Unordered",
+			">>>",  "TokenType.UnsignedShiftRight",
+			">>>=", "TokenType.UnsignedShiftRightEqual",
+			"++",   "TokenType.Increment",
+			"...",  "TokenType.Vararg",
+			"^",    "TokenType.Xor",
+			"^=",   "TokenType.XorEquals",
+			"@",    "TokenType.At",
+		));
+		case '0': .. case '9':
+			current = lexNumber(range, index, lineNumber);
+			break;
+		case '\'':
+		case '"':
+			current = lexString(range, index, lineNumber);
+			break;
+		case '`':
+			current = lexString(range, index, lineNumber, StringStyle.NotEscaped);
+			break;
+		case 'q':
+			auto r = range.save;
+			r.popFront();
+			if (!r.isEoF() && r.front == '{')
+				writeln("ParseTokenString");
+			else
+				goto default;
+		case '/':
+			auto r = range.save();
+			r.popFront();
+			if (r.isEoF())
+			{
+				current.type = TokenType.Div;
+				current.value = "/";
+				break;
+			}
+			switch (r.front)
+			{
+			case '/':
+			case '*':
+			case '+':
+				current = lexComment(range, index, lineNumber);
+				break outer;
+			case '=':
+				current.type = TokenType.DivEquals;
+				current.value = "/=";
+				break outer;
+			default:
+				current.type = TokenType.Div;
+				current.value = "/";
+				break;
+			}
+			break;
+		case 'r':
+			auto r = range.save();
+			r.popFront();
+			if (!r.isEoF() && r.front == '"')
+				writeln("parse wysiwyg string");
+			else
+				goto default;
+		case 'x':
+			auto r = range.save();
+			r.popFront();
+			if (!r.isEoF() && r.front == '"')
+				writeln("parse hex string");
+			else
+				goto default;
+		default:
+			auto app = appender!(ElementType!(R)[])();
+			while(!range.isEoF() && !isSeparating(range.front))
+			{
+				app.put(range.front);
+				range.popFront();
+			}
+			current.value = to!string(app.data);
+			current.type = lookupTokenTypeOptimized(current.value);
+			break;
+		}
 		return c;
 	}

@ -1126,226 +1305,13 @@ private:
 	uint index;
 	R range;
 	bool _empty;
+	IterationStyle iterStyle;
+	StringStyle tokenStyle;
 }

-//Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyle.CODE_ONLY)
-//	if (isSomeString!S)
-//{
-//	auto tokenAppender = appender!(Token[])();
-//
-//	// This is very likely a local maximum, but it does seem to take a few
-//	// milliseconds off of the run time
-//	tokenAppender.reserve(inputString.length / 4);
-//
-//	size_t endIndex = 0;
-//	uint lineNumber = 1;
-//
-//	if (inputString.length > 1 && inputString[0..2] == "#!")
-//	{
-//		Token currentToken;
-//		currentToken.lineNumber = lineNumber; // lineNumber is always 1
-//		currentToken.value = lexScriptLine(inputString, endIndex, lineNumber);
-//		currentToken.type = TokenType.ScriptLine;
-//	}
-//
-//	while (!isEoF(inputString, endIndex))
-//	{
-//		size_t prevIndex = endIndex;
-//		Token currentToken;
-//		auto startIndex = endIndex;
-//		if (isWhite(inputString[endIndex]))
-//		{
-//			if (iterationStyle == IterationStyle.EVERYTHING)
-//			{
-//				currentToken.lineNumber = lineNumber;
-//				currentToken.value = lexWhitespace(inputString, endIndex,
-//					lineNumber);
-//				currentToken.type = TokenType.Whitespace;
-//				tokenAppender.put(currentToken);
-//			}
-//			else
-//				lexWhitespace(inputString, endIndex, lineNumber);
-//			continue;
-//		}
-//		currentToken.startIndex = endIndex;
-//
-//		outerSwitch: switch(inputString[endIndex])
-//		{
-//		mixin(generateCaseTrie(
-//			"=",    "TokenType.Assign",
-//			"&",    "TokenType.BitAnd",
-//			"&=",   "TokenType.BitAndEquals",
-//			"|",    "TokenType.BitOr",
-//			"|=",   "TokenType.BitOrEquals",
-//			"~=",   "TokenType.CatEquals",
-//			":",    "TokenType.Colon",
-//			",",    "TokenType.Comma",
-//			"$",    "TokenType.Dollar",
-//			".",    "TokenType.Dot",
-//			"==",   "TokenType.Equals",
-//			"=>",   "TokenType.GoesTo",
-//			">",    "TokenType.Greater",
-//			">=",   "TokenType.GreaterEqual",
-//			"#",    "TokenType.Hash",
-//			"&&",   "TokenType.LogicAnd",
-//			"{",    "TokenType.LBrace",
-//			"[",    "TokenType.LBracket",
-//			"<",    "TokenType.Less",
-//			"<=",   "TokenType.LessEqual",
-//			"<>=",  "TokenType.LessEqualGreater",
-//			"<>",   "TokenType.LessOrGreater",
-//			"||",   "TokenType.LogicOr",
-//			"(",    "TokenType.LParen",
-//			"-",    "TokenType.Minus",
-//			"-=",   "TokenType.MinusEquals",
-//			"%",    "TokenType.Mod",
-//			"%=",   "TokenType.ModEquals",
-//			"*=",   "TokenType.MulEquals",
-//			"!",    "TokenType.Not",
-//			"!=",   "TokenType.NotEquals",
-//			"!>",   "TokenType.NotGreater",
-//			"!>=",  "TokenType.NotGreaterEqual",
-//			"!<",   "TokenType.NotLess",
-//			"!<=",  "TokenType.NotLessEqual",
-//			"!<>",  "TokenType.NotLessEqualGreater",
-//			"+",    "TokenType.Plus",
-//			"+=",   "TokenType.PlusEquals",
-//			"^^",   "TokenType.Pow",
-//			"^^=",  "TokenType.PowEquals",
-//			"}",    "TokenType.RBrace",
-//			"]",    "TokenType.RBracket",
-//			")",    "TokenType.RParen",
-//			";",    "TokenType.Semicolon",
-//			"<<",   "TokenType.ShiftLeft",
-//			"<<=",  "TokenType.ShiftLeftEqual",
-//			">>",   "TokenType.ShiftRight",
-//			">>=",  "TokenType.ShiftRightEqual",
-//			"..",   "TokenType.Slice",
-//			"*",    "TokenType.Star",
-//			"?",    "TokenType.Ternary",
-//			"~",    "TokenType.Tilde",
-//			"--",   "TokenType.Decrement",
-//			"!<>=", "TokenType.Unordered",
-//			">>>",  "TokenType.UnsignedShiftRight",
-//			">>>=", "TokenType.UnsignedShiftRightEqual",
-//			"++",   "TokenType.Increment",
-//			"...",  "TokenType.Vararg",
-//			"^",    "TokenType.Xor",
-//			"^=",   "TokenType.XorEquals",
-//		));
-//		case '0': .. case '9':
-//			currentToken = lexNumber(inputString, endIndex);
-//			break;
-//		case '/':
-//			++endIndex;
-//			if (isEoF(inputString, endIndex))
-//			{
-//				currentToken.value = "/";
-//				currentToken.type = TokenType.Div;
-//				currentToken.lineNumber = lineNumber;
-//				break;
-//			}
-//			currentToken.lineNumber = lineNumber;
-//			switch (inputString[endIndex])
-//			{
-//			case '/':
-//			case '+':
-//			case '*':
-//				if (iterationStyle == IterationStyle.CODE_ONLY)
-//				{
-//					lexComment(inputString, endIndex, lineNumber);
-//					continue;
-//				}
-//				else
-//				{
-//					currentToken.value = lexComment(inputString, endIndex, lineNumber);
-//					currentToken.type = TokenType.Comment;
-//					break;
-//				}
-//			case '=':
-//				currentToken.value = "/=";
-//				currentToken.type = TokenType.DivEquals;
-//				++endIndex;
-//				break;
-//			default:
-//				currentToken.value = "/";
-//				currentToken.type = TokenType.Div;
-//				break;
-//			}
-//			break;
-//		case 'r':
-//			++endIndex;
-//			if (isEoF(inputString, endIndex) || inputString[endIndex] != '"')
-//				goto default;
-//			currentToken.lineNumber = lineNumber;
-//			currentToken.value = lexString(inputString, endIndex,
-//				lineNumber, inputString[endIndex], false);
-//			currentToken.type = TokenType.StringLiteral;
-//			break;
-//		case '`':
-//			currentToken.lineNumber = lineNumber;
-//			currentToken.value = lexString(inputString, endIndex, lineNumber,
-//				inputString[endIndex], false);
-//			currentToken.type = TokenType.StringLiteral;
-//			break;
-//		case 'x':
-//			++endIndex;
-//			if (isEoF(inputString, endIndex) || inputString[endIndex] != '"')
-//				goto default;
-//			else
-//				goto case '"'; // BUG: this is incorrect! according to specification, hex data should be lexed differently than "normal" strings
-//		case '\'':
-//		case '"':
-//			currentToken.lineNumber = lineNumber;
-//			currentToken.value = lexString(inputString, endIndex, lineNumber,
-//				inputString[endIndex]);
-//			currentToken.type = TokenType.StringLiteral;
-//			break;
-//		case 'q':
-//			currentToken.value = "q";
-//			++endIndex;
-//			if (!isEoF(inputString, endIndex))
-//			{
-//				switch (inputString[endIndex])
-//				{
-//					case '"':
-//						currentToken.lineNumber = lineNumber;
-//						currentToken.value ~= lexDelimitedString(inputString,
-//							endIndex, lineNumber);
-//						currentToken.type = TokenType.StringLiteral;
-//						break outerSwitch;
-//					case '{':
-//						currentToken.lineNumber = lineNumber;
-//						currentToken.value ~= lexTokenString(inputString,
-//							endIndex, lineNumber);
-//						currentToken.type = TokenType.StringLiteral;
-//						break outerSwitch;
-//					default:
-//						break;
-//				}
-//			}
-//			goto default;
-//		case '@':
-//			++endIndex;
-//			goto default;
-//		default:
-//			while(!isEoF(inputString, endIndex) && !isSeparating(inputString[endIndex]))
-//				++endIndex;
-//			currentToken.value = inputString[startIndex .. endIndex];
-//			currentToken.type = lookupTokenTypeOptimized(currentToken.value);
-//			//currentToken.type = lookupTokenType(currentToken.value);
-//			currentToken.lineNumber = lineNumber;
-//			break;
-//		}
-//		//stderr.writeln(currentToken);
-//		tokenAppender.put(currentToken);
-//
-//		// This should never happen.
-//		if (endIndex <= prevIndex)
-//		{
-//			stderr.writeln("FAIL");
-//			return [];
-//		}
-//	}
-//	return tokenAppender.data;
-//}
+unittest
+{
+	auto c = ">><==>)(*)\"TestString\"if import ifire 0,10.4f `\n`@property void//comment\ntest/* comment *//+comment/+moar comment+/+/";
+	foreach (t; byToken(c))
+		writeln(t);
+}