// Written in the D programming language

/**
 * This module contains a range-based lexer for the D programming language.
 *
 * Copyright: Brian Schott 2013
 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
 * Authors: Brian Schott
 * Source: $(PHOBOSSRC std/d/_lexer.d)
 */

module std.d.lexer;

import std.range;
import std.traits;
import std.algorithm;
import std.conv;
import std.uni;
import std.ascii;
import std.exception;
import std.d.entities;

public:

/**
 * Represents a D token
 */
struct Token
{
	/// The token type.
	TokenType type;

	/// The representation of the token in the original source code.
	string value;

	/// The number of the line the token is on.
	uint lineNumber;

	/// The character index of the start of the token in the original text.
	uint startIndex;

	/**
	 * Check to see if the token is of the same type and has the same string
	 * representation as the given token.
	 */
	bool opEquals(ref const(Token) other) const
	{
		return other.type == type && other.value == value;
	}

	/**
	 * Checks to see if the token's string representation is equal to the given
	 * string.
	 */
	bool opEquals(string value) const { return this.value == value; }

	/**
	 * Checks to see if the token is of the given type.
	 */
	bool opEquals(TokenType type) const { return type == type; }

	/**
	 * Comparison operator orders tokens by start index.
	 */
	int opCmp(size_t i) const
	{
		if (startIndex < i) return -1;
		if (startIndex > i) return 1;
		return 0;
	}
}

/**
 * Configure the behavior of the byToken() function
 */
enum IterationStyle
{
	/// Only include code, not whitespace or comments
	CodeOnly = 0,
	/// Includes comments
	IncludeComments = 0b0001,
	/// Includes whitespace
	IncludeWhitespace = 0b0010,
    /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
    IncludeSpecialTokens = 0b0100,
	/// Include everything
	Everything = IncludeComments | IncludeWhitespace
}

/**
 * Configuration of the string lexing style
 */
enum StringStyle : uint
{
	/**
	 * Escape sequences will be replaced with their equivalent characters,
	 * enclosing quote characters will not be included. Useful for creating a
	 * compiler or interpreter.
	 */
	Default = 0b0000,

	/**
	 * Escape sequences will not be processed. An escaped quote character will
	 * not terminate string lexing, but it will not be replaced with the quote
	 * character in the token.
	 */
	NotEscaped = 0b0001,

	/**
	 * Strings will include their opening and closing quote characters as well
	 * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
	 * include the $(D_STRING 'w') character as well as the opening and closing
	 * quotes$(RPAREN)
	 */
	IncludeQuotes = 0x0010,

	/**
	 * Strings will be read exactly as they appeared in the source, including
	 * their opening and closing quote characters. Useful for syntax
	 * highlighting.
	 */
	Source = NotEscaped | IncludeQuotes,
}

/**
 * Iterate over the given range of characters by D tokens.
 * Params:
 *     range = the range of characters
 *     iterationStyle = See IterationStyle
 *     stringStyle = see StringStyle
 * Returns:
 *     an input range of tokens
 */
TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
	const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
{
	auto r = new TokenRange!(R)(range);
	r.stringStyle = stringStyle;
	r.iterStyle = iterationStyle;
	r.lineNumber = 1;
	r.popFront();
	return r;
}

/**
 * Range of tokens
 */
class TokenRange(R) : InputRange!(Token)
{
	this(ref R range)
	{
		this.range = range;
	}

	/**
	 * Returns: true if the range is empty
	 */
	override bool empty() const @property
	{
		return _empty;
	}

	/**
	 * Returns: the current token
	 */
	override Token front() const @property
	{
		enforce(!_empty, "Cannot call front() on empty token range");
		return current;
	}

	/**
	 * Returns the current token and then removes it from the range
	 */
	override Token moveFront()
	{
		auto r = front();
		popFront();
		return r;
	}

	override int opApply(int delegate(Token) dg)
	{
		int result = 0;
		while (!empty)
		{
			result = dg(front);
			if (result)
				break;
			popFront();
		}
		return result;
	}

	override int opApply(int delegate(size_t, Token) dg)
	{
		int result = 0;
		int i = 0;
		while (!empty)
		{
			result = dg(i, front);
			if (result)
				break;
			popFront();
		}
		return result;
	}

	/**
	 * Removes the current token from the range
	 */
	override void popFront()
	{
		if (range.empty)
		{
			_empty = true;
			return;
		}

		current = Token.init;
		current.lineNumber = lineNumber;
		current.startIndex = index;

		while (std.uni.isWhite(range.front))
		{
			if (iterStyle == IterationStyle.Everything)
			{
				current = lexWhitespace(range, index, lineNumber);
				return;
			}
			else
				lexWhitespace(range, index, lineNumber);
		}
		outer: switch (range.front)
		{
		mixin(generateCaseTrie(
			"=",    "TokenType.Assign",
			"&",    "TokenType.BitAnd",
			"&=",   "TokenType.BitAndEquals",
			"|",    "TokenType.BitOr",
			"|=",   "TokenType.BitOrEquals",
			"~=",   "TokenType.CatEquals",
			":",    "TokenType.Colon",
			",",    "TokenType.Comma",
			"$",    "TokenType.Dollar",
			".",    "TokenType.Dot",
			"==",   "TokenType.Equals",
			"=>",   "TokenType.GoesTo",
			">",    "TokenType.Greater",
			">=",   "TokenType.GreaterEqual",
			"&&",   "TokenType.LogicAnd",
			"{",    "TokenType.LBrace",
			"[",    "TokenType.LBracket",
			"<",    "TokenType.Less",
			"<=",   "TokenType.LessEqual",
			"<>=",  "TokenType.LessEqualGreater",
			"<>",   "TokenType.LessOrGreater",
			"||",   "TokenType.LogicOr",
			"(",    "TokenType.LParen",
			"-",    "TokenType.Minus",
			"-=",   "TokenType.MinusEquals",
			"%",    "TokenType.Mod",
			"%=",   "TokenType.ModEquals",
			"*=",   "TokenType.MulEquals",
			"!",    "TokenType.Not",
			"!=",   "TokenType.NotEquals",
			"!>",   "TokenType.NotGreater",
			"!>=",  "TokenType.NotGreaterEqual",
			"!<",   "TokenType.NotLess",
			"!<=",  "TokenType.NotLessEqual",
			"!<>",  "TokenType.NotLessEqualGreater",
			"+",    "TokenType.Plus",
			"+=",   "TokenType.PlusEquals",
			"^^",   "TokenType.Pow",
			"^^=",  "TokenType.PowEquals",
			"}",    "TokenType.RBrace",
			"]",    "TokenType.RBracket",
			")",    "TokenType.RParen",
			";",    "TokenType.Semicolon",
			"<<",   "TokenType.ShiftLeft",
			"<<=",  "TokenType.ShiftLeftEqual",
			">>",   "TokenType.ShiftRight",
			">>=",  "TokenType.ShiftRightEqual",
			"..",   "TokenType.Slice",
			"*",    "TokenType.Star",
			"?",    "TokenType.Ternary",
			"~",    "TokenType.Tilde",
			"--",   "TokenType.Decrement",
			"!<>=", "TokenType.Unordered",
			">>>",  "TokenType.UnsignedShiftRight",
			">>>=", "TokenType.UnsignedShiftRightEqual",
			"++",   "TokenType.Increment",
			"...",  "TokenType.Vararg",
			"^",    "TokenType.Xor",
			"^=",   "TokenType.XorEquals",
			"@",    "TokenType.At",
		));
		case '0': .. case '9':
			current = lexNumber(range, index, lineNumber);
			break;
		case '\'':
		case '"':
			current = lexString(range, index, lineNumber, stringStyle);
			break;
		case '`':
			current = lexString(range, index, lineNumber, stringStyle);
			break;
		case 'q':
			auto r = range.save;
			r.popFront();
			if (!r.isEoF() && r.front == '{')
			{
				current = lexTokenString(range, index, lineNumber, stringStyle);
				break;
			}
			else if (!r.isEoF() && r.front == '"')
			{
				current = lexDelimitedString(range, index, lineNumber,
					stringStyle);
				break;
			}
			else
				goto default;
		case '/':
			auto r = range.save();
			r.popFront();
			if (r.isEoF())
			{
				current.type = TokenType.Div;
				current.value = "/";
				range.popFront();
				++index;
				break;
			}
			switch (r.front)
			{
			case '/':
			case '*':
			case '+':
				current = lexComment(range, index, lineNumber);
                if (!(iterStyle & IterationStyle.IncludeComments))
                {
                    if (range.empty)
                    {
                        _empty = true;
                        return;
                    }
                    popFront();
                }
				break outer;
			case '=':
				current.type = TokenType.DivEquals;
				current.value = "/=";
				range.popFront();
				range.popFront();
				index += 2;
				break outer;
			default:
				current.type = TokenType.Div;
				current.value = "/";
				++index;
				range.popFront();
				break outer;
			}
		case 'r':
			auto r = range.save();
			r.popFront();
			if (!r.isEoF() && r.front == '"')
			{
				current = lexString(range, index, lineNumber, stringStyle);
				break;
			}
			else
				goto default;
		case 'x':
			auto r = range.save();
			r.popFront();
			if (!r.isEoF() && r.front == '"')
			{
				current = lexHexString(range, index, lineNumber);
				break;
			}
			else
				goto default;
        case '#':
            string special = lexSpecialTokenSequence(range, index, lineNumber);
            if (special)
            {
                current.type = TokenType.SpecialTokenSequence;
                current.value = special;
                if (!(iterStyle & IterationStyle.IncludeSpecialTokens))
                {
                    if (range.empty)
                    {
                        _empty = true;
                        return;
                    }
                    popFront();
                }
            }
            else
            {
                current.type = TokenType.Hash;
                current.value = "#";
                range.popFront();
				++index;
				break;
            }
            break;
		default:
			auto app = appender!(ElementType!(R)[])();
			while(!range.isEoF() && !isSeparating(range.front))
			{
				app.put(range.front);
				range.popFront();
				++index;
			}
			current.value = to!string(app.data);
			current.type = lookupTokenType(current.value);
			break;
		}
	}

private:
	Token current;
	uint lineNumber;
	uint index;
	R range;
	bool _empty;
	IterationStyle iterStyle;
	StringStyle stringStyle;
}

unittest
{
    import std.stdio;
    auto a = "/**comment*/\n#lin #line 10 \"test.d\"\nint a;//test\n";
    foreach (t; byToken(a))
        writeln(t);
}

/**
 * Listing of all the tokens in the D language.
 *
 * Token types are arranged so that it is easy to group tokens while iterating
 * over them. For example:
 * ---
 * assert(TokenType.Increment < TokenType.OPERATORS_END);
 * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
 * ---
 * The non-token values are documented below:
 *
 * $(BOOKTABLE ,
 *     $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
 *     $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
 *     $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
 *     $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
 *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
 *     $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
 *     $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
 *     $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
 *     $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
 *     $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
 *     $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
 * )
 * Note that several of the above ranges overlap.
 */
enum TokenType: uint
{
	// Operators
	OPERATORS_BEGIN, ///
	Assign,	/// =
	At, /// @
	BitAnd,	/// &
	BitAndEquals,	/// &=
	BitOr,	/// |
	BitOrEquals,	/// |=
	CatEquals,	/// ~=
	Colon,	/// :
	Comma,	/// ,
	Decrement,	/// --
	Div,	/// /
	DivEquals,	/// /=
	Dollar,	/// $
	Dot,	/// .
	Equals,	/// ==
	GoesTo, // =>
	Greater,	/// >
	GreaterEqual,	/// >=
	Hash, // #
	Increment,	/// ++
	LBrace,	/// {
	LBracket,	/// [
	Less,	/// <
	LessEqual,	/// <=
	LessEqualGreater, // <>=
	LessOrGreater,	/// <>
	LogicAnd,	/// &&
	LogicOr,	/// ||
	LParen,	/// $(LPAREN)
	Minus,	/// -
	MinusEquals,	/// -=
	Mod,	/// %
	ModEquals,	/// %=
	MulEquals,	/// *=
	Not,	/// !
	NotEquals,	/// !=
	NotGreater,	/// !>
	NotGreaterEqual,	/// !>=
	NotLess,	/// !<
	NotLessEqual,	/// !<=
	NotLessEqualGreater,	/// !<>
	Plus,	/// +
	PlusEquals,	/// +=
	Pow,	/// ^^
	PowEquals,	/// ^^=
	RBrace,	/// }
	RBracket,	/// ]
	RParen,	/// $(RPAREN)
	Semicolon,	/// ;
	ShiftLeft,	/// <<
	ShiftLeftEqual,	/// <<=
	ShiftRight,	/// >>
	ShiftRightEqual,	/// >>=
	Slice, // ..
	Star,	/// *
	Ternary,	/// ?
	Tilde,	/// ~
	Unordered,	/// !<>=
	UnsignedShiftRight,	/// >>>
	UnsignedShiftRightEqual,	/// >>>=
	Vararg,	/// ...
	Xor,	/// ^
	XorEquals,	/// ^=
	OPERATORS_END, ///

	// Types
	TYPES_BEGIN, ///
	Bool, /// bool
	Byte, /// byte
	Cdouble, /// cdouble
	Cent, /// cent
	Cfloat, /// cfloat
	Char, /// char
	Creal, /// creal
	Dchar, /// dchar
	Double, /// double
	DString, /// dstring
	Float, /// float
	Function, /// function
	Idouble, /// idouble
	Ifloat, /// ifloat
	Int, /// int
	Ireal, /// ireal
	Long, /// long
	Real, /// real
	Short, /// short
	String, /// string
	Ubyte, /// ubyte
	Ucent, /// ucent
	Uint, /// uint
	Ulong, /// ulong
	Ushort, /// ushort
	Void, /// void
	Wchar, /// wchar
	WString, /// wstring
	TYPES_END, ///

	Template, /// template

	// Keywords
	KEYWORDS_BEGIN, ///
		ATTRIBUTES_BEGIN, ///
		Align, /// align
		Deprecated, /// deprecated
		Extern, /// extern
		Pragma, /// pragma
			PROTECTION_BEGIN, ///
			Export, /// export
			Package, /// package
			Private, /// private
			Protected, /// protected
			Public, /// public
			PROTECTION_END, ///
		Abstract, /// abstract
		Auto, /// auto
		Const, /// const
		Final, /// final
		Gshared, /// __gshared
		Immutable, // immutable
		Inout, // inout
		Scope, /// scope
		Shared, // shared
		Static, /// static
		Synchronized, /// synchronized
		ATTRIBUTES_END, ///
	Alias, /// alias
	Asm, /// asm
	Assert, /// assert
	Body, /// body
	Break, /// break
	Case, /// case
	Cast, /// cast
	Catch, /// catch
	Class, /// class
	Continue, /// continue
	Debug, /// debug
	Default, /// default
	Delegate, /// delegate
	Delete, /// delete
	Do, /// do
	Else, /// else
	Enum, /// enum
	False, /// false
	Finally, /// finally
	Foreach, /// foreach
	Foreach_reverse, /// foreach_reverse
	For, /// for
	Goto, /// goto
	If, /// if
	Import, /// import
	In, /// in
	Interface, /// interface
	Invariant, /// invariant
	Is, /// is
	Lazy, /// lazy
	Macro, /// macro
	Mixin, /// mixin
	Module, /// module
	New, /// new
	Nothrow, /// nothrow
	Null, /// null
	Out, /// out
	Override, /// override
	Pure, /// pure
	Ref, /// ref
	Return, /// return
	Struct, /// struct
	Super, /// super
	Switch, /// switch
	This, /// this
	Throw, /// throw
	True, /// true
	Try, /// try
	Typedef, /// typedef
	Typeid, /// typeid
	Typeof, /// typeof
	Union, /// union
	Unittest, /// unittest
	Version, /// version
	Volatile, /// volatile
	While, /// while
	With, /// with
	KEYWORDS_END, ///

	// Constants
	CONSTANTS_BEGIN, ///
	File, /// __FILE__
	Line, /// __LINE__
	Thread, /// __thread
	Traits, /// __traits
	CONSTANTS_END, ///

	// Misc
	MISC_BEGIN, ///
	Comment, /// /** comment */ or // comment or ///comment
	Identifier, /// anything else
	ScriptLine, // Line at the beginning of source file that starts from #!
	Whitespace, /// whitespace
    SpecialTokenSequence, /// #line 10 "file.d"
	MISC_END, ///

	// Literals
	LITERALS_BEGIN, ///
		NUMBERS_BEGIN, ///
		DoubleLiteral, /// 123.456
		FloatLiteral, /// 123.456f or 0x123_45p-af
		IDoubleLiteral, /// 123.456i
		IFloatLiteral, /// 123.456fi
		IntLiteral, /// 123 or 0b1101010101
		LongLiteral, /// 123L
		RealLiteral, /// 123.456L
		IRealLiteral, /// 123.456Li
		UnsignedIntLiteral, /// 123u
		UnsignedLongLiteral, /// 123uL
		NUMBERS_END, ///
		STRINGS_BEGIN, ///
		DStringLiteral, /// "32-bit character string"d
		StringLiteral, /// "a string"
		WStringLiteral, /// "16-bit character string"w
		STRINGS_END, ///
	LITERALS_END, ///
}

// Implementation details follow
private:

private pure bool isNewline(R)(R range)
{
	return range.front == '\n' || range.front == '\r';
}

pure bool isEoF(R)(R range)
{
	return range.empty || range.front == 0 || range.front == 0x1a;
}

C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index)
	if (isSomeChar!C && isForwardRange!R)
{
	C[] chars;
	if (range.front == '\r')
	{
		chars ~= range.front;
		range.popFront();
		++index;
	}
	if (range.front == '\n')
	{
		chars ~= range.front;
		range.popFront();
		++index;
	}
	return chars;
}

unittest
{
	uint i;
	auto s = "\r\ntest";
	assert (popNewline(s, i) == "\r\n");
	assert (s == "test");
}

Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index,
	ref uint lineNumber) if (isForwardRange!R && isSomeChar!C)
{
	Token t;
	t.type = TokenType.Whitespace;
	t.lineNumber = lineNumber;
	t.startIndex = index;
	auto app = appender!(C[])();
	while (!isEoF(range) && std.uni.isWhite(range.front))
	{
		if (isNewline(range))
		{
			++lineNumber;
			app.put(popNewline(range, index));
		}
		else
		{
			app.put(range.front);
			range.popFront();
			++index;
		}
	}
	t.value = to!string(app.data);
	return t;
}

unittest
{
	import std.stdio;
	uint lineNum = 1;
	uint index;
	auto chars = " \n \r\n \tabcde";
	auto r = lexWhitespace(chars, index, lineNum);
	assert (r.value == " \n \r\n \t");
	assert (chars == "abcde");
	assert (lineNum == 3);
}

Token lexComment(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber)
	if (isSomeChar!C && isForwardRange!R)
in
{
	assert (input.front == '/');
}
body
{
	Token t;
	t.lineNumber = lineNumber;
	t.type = TokenType.Comment;
	t.startIndex = index;
	auto app = appender!(C[])();
	app.put(input.front);
	input.popFront();
	switch(input.front)
	{
	case '/':
		while (!isEoF(input) && !isNewline(input))
		{
			app.put(input.front);
			input.popFront();
			++index;
		}
		break;
	case '*':
		while (!isEoF(input))
		{
			if (isNewline(input))
			{
				app.put(popNewline(input, index));
				++lineNumber;
			}
			else if (input.front == '*')
			{
				app.put(input.front);
				input.popFront();
				++index;
				if (input.front == '/')
				{
					app.put(input.front);
					input.popFront();
					++index;
					break;
				}
			}
			else
			{
				app.put(input.front);
				input.popFront();
				++index;
			}
		}
		break;
	case '+':
		int depth = 1;
		while (depth > 0 && !isEoF(input))
		{
			if (isNewline(input))
			{
				app.put(popNewline(input, index));
				lineNumber++;
			}
			else if (input.front == '+')
			{
				app.put(input.front);
				input.popFront();
				++index;
				if (input.front == '/')
				{
					app.put(input.front);
					input.popFront();
					++index;
					--depth;
				}
			}
			else if (input.front == '/')
			{
				app.put(input.front);
				input.popFront();
				++index;
				if (input.front == '+')
				{
					app.put(input.front);
					input.popFront();
					++index;
					++depth;
				}
			}
			else
			{
				app.put(input.front);
				input.popFront();
				++index;
			}
		}
		break;
	default:
		Token errorToken;
		return errorToken;
	}
	t.value = to!string(app.data);
	return t;
}

unittest
{
	uint index;
	uint lineNumber = 1;
	auto chars = "//this is a comment\r\nthis is not";
	auto comment = lexComment(chars, index, lineNumber);
	assert (chars == "\r\nthis is not");
	assert (comment.value == "//this is a comment");
}

unittest
{
	uint index = 0;
	uint lineNumber = 1;
	auto chars = "/* this is a\n\tcomment\r\n */this is not";
	auto comment = lexComment(chars, index, lineNumber);
	assert (chars == "this is not");
	assert (comment.value == "/* this is a\n\tcomment\r\n */");
	assert (lineNumber == 3);
}

unittest
{
	uint index;
	uint lineNumber = 1;
	auto chars = "/+this is a /+c/+omm+/ent+/ \r\nthis+/ is not";
	auto comment = lexComment(chars, index, lineNumber);
	assert (chars == " is not");
	assert (comment.value == "/+this is a /+c/+omm+/ent+/ \r\nthis+/");
	assert (lineNumber == 2);
}

unittest
{
	uint i;
	uint l;
	auto chars = "/(";
	auto comment = lexComment(chars, i, l);
	assert (comment == "");
}

string popDigitChars(R, C = ElementType!R, alias isInterestingDigit)(ref R input, ref uint index,
	uint upTo) if (isSomeChar!C && isForwardRange!R)
{
	auto app = appender!(C[])();
	for (uint i = 0; i != upTo; ++i)
	{
		if (isInterestingDigit(input.front))
		{
			app.put(input.front);
			input.popFront();
		}
		else
			break;
	}
	return to!string(app.data);
}

string popHexChars(R)(ref R input, ref uint index, uint upTo)
{
	return popDigitChars!(R, ElementType!R, isHexDigit)(input, index, upTo);
}

string popOctalChars(R)(ref R input, ref uint index, uint upTo)
{
	return popDigitChars!(R, ElementType!R, isOctalDigit)(input, index, upTo);
}

unittest
{
	uint i;
	auto a = "124ac82d3fqwerty";
	auto ra = popHexChars(a, i, uint.max);
	assert (a == "qwerty");
	assert (ra == "124ac82d3f");
	auto b = "08a7c2e3";
	auto rb = popHexChars(b, i, 4);
	assert (rb.length == 4);
	assert (rb == "08a7");
	assert (b == "c2e3");
	auto c = "00123832";
	auto rc = popOctalChars(c, i, uint.max);
	assert (c == "832");
	assert (rc == "00123");
}

string interpretEscapeSequence(R, C = ElementType!R)(ref R input, ref uint index)
	if (isSomeChar!C && isForwardRange!R)
in
{
	assert(input.front == '\\');
}
body
{
	input.popFront();
	switch (input.front)
	{
	case '\'':
	case '\"':
	case '?':
	case '\\':
	case 0:
	case 0x1a:
		auto f = input.front;
		input.popFront();
		++index;
		return to!string(f);
	case 'a': input.popFront(); ++index; return "\a";
	case 'b': input.popFront(); ++index; return "\b";
	case 'f': input.popFront(); ++index; return "\f";
	case 'n': input.popFront(); ++index; return "\n";
	case 'r': input.popFront(); ++index; return "\r";
	case 't': input.popFront(); ++index; return "\t";
	case 'v': input.popFront(); ++index; return "\v";
	case 'x':
		input.popFront();
		auto hexChars = popHexChars(input, index, 2);
		return to!string(cast(dchar) parse!uint(hexChars, 16));
	case '0': .. case '7':
		auto octalChars = popOctalChars(input, index, 3);
		return to!string(cast(dchar) parse!uint(octalChars, 8));
	case 'u':
		input.popFront();
		auto hexChars = popHexChars(input, index, 4);
		return to!string(cast(dchar) parse!uint(hexChars, 16));
	case 'U':
		input.popFront();
		auto hexChars = popHexChars(input, index, 8);
		return to!string(cast(dchar) parse!uint(hexChars, 16));
	case '&':
		input.popFront();
		++index;
		auto entity = appender!(char[])();
		while (!input.isEoF() && input.front != ';')
		{
			entity.put(input.front);
			input.popFront();
			++index;
		}
		if (!isEoF(input))
		{
			auto decoded = to!string(entity.data) in characterEntities;
			input.popFront();
			++index;
			if (decoded !is null)
				return to!string(*decoded);
		}
		return "";
	default:
		input.popFront();
		++index;
		// This is an error
		return "\\";
	}
}

unittest
{
	uint i;
	auto vals = [
		"\\&amp;": "&",
		"\\n": "\n",
		"\\?": "?",
		"\\u0033": "\u0033",
		"\\U00000076": "v",
		"\\075": "=",
		"\\'": "'",
		"\\a": "\a",
		"\\b": "\b",
		"\\f": "\f",
		"\\r": "\r",
		"\\t": "\t",
		"\\v": "\v",
		"\\y": "\\",
		"\\x20": " ",
		"\\&eeeeeeror;": "",
	];
	foreach (k, v; vals)
		assert (interpretEscapeSequence(k, i) == v);
}

Token lexHexString(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber,
	const StringStyle style = StringStyle.Default)
in
{
    assert (input.front == 'x');
}
body
{
    Token t;
    t.lineNumber = lineNumber;
    t.startIndex = index;
    t.type = TokenType.StringLiteral;
    auto app = appender!(C[])();
    if (style & StringStyle.IncludeQuotes)
        app.put("x\"");
    input.popFront();
    input.popFront();
    index += 2;
    while (!input.isEoF())
    {
        if (isNewline(input))
        {
            app.put(popNewline(input, index));
            ++lineNumber;
        }
        else if (isHexDigit(input.front))
        {
            app.put(input.front);
            input.popFront();
            ++index;
        }
        else if (std.uni.isWhite(input.front) && (style & StringStyle.NotEscaped))
        {
            app.put(input.front);
            input.popFront();
            ++index;
        }
        else if (input.front == '"')
        {
            if (style & StringStyle.IncludeQuotes)
                app.put('"');
            input.popFront();
            ++index;
            break;
        }
        else
        {
            // This is an error
        }
    }
    if (!input.isEoF())
	{
		switch (input.front)
		{
		case 'w':
			t.type = TokenType.WStringLiteral;
			goto case 'c';
		case 'd':
			t.type = TokenType.DStringLiteral;
			goto case 'c';
		case 'c':
			if (style & StringStyle.IncludeQuotes)
				app.put(input.front);
			input.popFront();
			++index;
			break;
		default:
			break;
		}
	}
    if (style & StringStyle.NotEscaped)
		t.value = to!string(app.data);
	else
    {
        auto a = appender!(char[])();
        foreach (b; std.range.chunks(app.data, 2))
            a.put(to!string(cast(dchar) parse!uint(b, 16)));
        t.value = to!string(a.data);
    }


    return t;
}

unittest
{
    uint i;
    uint l;

    auto a = `x"204041"`;
    auto ar = lexHexString(a, i, l);
    assert (ar == " @A");
    assert (ar == TokenType.StringLiteral);

    auto b = `x"20"w`;
    auto br = lexHexString(b, i, l);
    assert (br == " ");
    assert (br == TokenType.WStringLiteral);

    auto c = `x"6d"`;
    auto cr = lexHexString(c, i, l, StringStyle.NotEscaped);
    assert (cr == "6d");

    auto d = `x"5e5f"d`;
    auto dr = lexHexString(d, i, l, StringStyle.NotEscaped | StringStyle.IncludeQuotes);
    assert (dr == `x"5e5f"d`);
    assert (dr == TokenType.DStringLiteral);
}

Token lexString(R)(ref R input, ref uint index, ref uint lineNumber,
	const StringStyle style = StringStyle.Default)
in
{
	assert (input.front == '\'' || input.front == '"' || input.front == '`' || input.front == 'r');
}
body
{
	Token t;
	t.lineNumber = lineNumber;
	t.startIndex = index;
	t.type = TokenType.StringLiteral;
	auto app = appender!(char[])();
	bool isWysiwyg = input.front == 'r' || input.front == '`';
	if (input.front == 'r')
	{
		if (style & StringStyle.IncludeQuotes)
			app.put('r');
		input.popFront();
	}
	auto quote = input.front;
	input.popFront();
	++index;

	if (style & StringStyle.IncludeQuotes)
		app.put(quote);
	while (!isEoF(input))
	{
		if (isNewline(input))
		{
			app.put(popNewline(input, index));
			lineNumber++;
		}
		else if (input.front == '\\')
		{
			if (style & StringStyle.NotEscaped)
			{
				auto r = input.save();
				r.popFront();
				if (r.front == quote && !isWysiwyg)
				{
					app.put('\\');
					app.put(quote);
					input.popFront();
					input.popFront();
					index += 2;
				}
				else if (r.front == '\\' && !isWysiwyg)
				{
					app.put('\\');
					app.put('\\');
					input.popFront();
					input.popFront();
					index += 2;
				}
				else
				{
					app.put('\\');
					input.popFront();
					++index;
				}
			}
			else
				app.put(interpretEscapeSequence(input, index));
		}
		else if (input.front == quote)
		{
			if (style & StringStyle.IncludeQuotes)
				app.put(quote);
			input.popFront();
			++index;
			break;
		}
		else
		{
			app.put(input.front);
			input.popFront();
			++index;
		}
	}
	if (!input.isEoF())
	{
		switch (input.front)
		{
		case 'w':
			t.type = TokenType.WStringLiteral;
			goto case 'c';
		case 'd':
			t.type = TokenType.DStringLiteral;
			goto case 'c';
		case 'c':
			if (style & StringStyle.IncludeQuotes)
				app.put(input.front);
			input.popFront();
			++index;
			break;
		default:
			break;
		}
	}
	t.value = to!string(app.data);
	return t;
}

unittest
{
	uint l = 1;
	uint i;
	auto a = `"abcde"`;
	assert (lexString(a, i, l) == "abcde");
	auto b = "\"ab\\ncd\"";
	assert (lexString(b, i, l) == "ab\ncd");
	auto c = "`abc\\ndef`";
	assert (lexString(c, i, l, StringStyle.NotEscaped) == "abc\\ndef");
	auto d = `"12345"w`;
	assert (lexString(d, i, l).type == TokenType.WStringLiteral);
	auto e = `"abc"c`;
	assert (lexString(e, i, l).type == TokenType.StringLiteral);
	auto f = `"abc"d`;
	assert (lexString(f, i, l).type == TokenType.DStringLiteral);
	auto g = "\"a\nb\"";
	assert (lexString(g, i, l) == "a\nb");
}

Token lexDelimitedString(R)(ref R input, ref uint index,
	ref uint lineNumber, const StringStyle stringStyle = StringStyle.Default)
in
{
	assert(input.front == 'q');
}
body
{
	auto app = appender!(ElementType!R[])();
	Token t;
	t.startIndex = index;
	t.lineNumber = lineNumber;
	t.type = TokenType.StringLiteral;

	input.popFront(); // q
	input.popFront(); // "
	index += 2;
	if (stringStyle & StringStyle.IncludeQuotes)
	{
		app.put('q');
		app.put('"');
	}

	bool heredoc;
	ElementType!R open;
	ElementType!R close;

	switch (input.front)
	{
	case '[': open = '['; close = ']'; break;
	case '{': open = '{'; close = '}'; break;
	case '(': open = '('; close = ')'; break;
	case '<': open = '<'; close = '>'; break;
	default: heredoc = true; break;
	}

	if (heredoc)
	{
		auto hereOpen = appender!(ElementType!(R)[])();
		while (!input.isEoF() && !std.uni.isWhite(input.front))
		{
			hereOpen.put(input.front());
			input.popFront();
		}
		if (input.isNewline())
		{
			++lineNumber;
			input.popNewline(index);
		}
//		else
//			this is an error
		while (!input.isEoF())
		{
			if (isNewline(input))
			{
				++lineNumber;
				app.put(input.popNewline(index));
			}
			else if (input.front == '"' && app.data.endsWith(hereOpen.data))
			{
				app.put('"');
				++index;
				input.popFront();
				if (stringStyle & StringStyle.IncludeQuotes)
					t.value = to!string(app.data);
				else
					t.value = to!string(app.data[0 .. app.data.length - hereOpen.data.length - 1]);
				break;
			}
			else
			{
				app.put(input.front);
				++index;
				input.popFront();
			}
		}
	}
	else
	{
		if (stringStyle & StringStyle.IncludeQuotes)
			app.put(input.front);
		input.popFront();
		int depth = 1;
		while (depth > 0 && !input.isEoF())
		{
			if (isNewline(input))
				app.put(popNewline(input, index));
			else
			{
				if (input.front == close)
				{
					--depth;
					if (depth == 0)
					{
						if (stringStyle & StringStyle.IncludeQuotes)
						{
							app.put(close);
							app.put('"');
						}
						input.popFront();
						input.popFront();
						break;
					}
				}
				else if (input.front == open)
					++depth;
				app.put(input.front);
				input.popFront();
				++index;
			}
		}
	}
	if (!input.isEoF())
	{
		switch (input.front)
		{
		case 'w':
			t.type = TokenType.WStringLiteral;
			goto case 'c';
		case 'd':
			t.type = TokenType.DStringLiteral;
			goto case 'c';
		case 'c':
			if (stringStyle & StringStyle.IncludeQuotes)
				app.put(input.front);
			input.popFront();
			++index;
			break;
		default:
			break;
		}
	}
	if (t.value is null)
		t.value = to!string(app.data);
	return t;
}

unittest
{
	uint i;
	uint l;
	auto a = `q"{abc{}de}"`;
	auto ar = lexDelimitedString(a, i, l);
	assert (ar == "abc{}de");
	assert (ar == TokenType.StringLiteral);

	auto b = "q\"abcde\n123\nabcde\"w";
	auto br = lexDelimitedString(b, i, l);
	assert (br == "123\n");
	assert (br == TokenType.WStringLiteral);

	auto c = `q"[<xml></xml>]");`;
	auto cr = lexDelimitedString(c, i, l, StringStyle.Source);
	assert (cr == `q"[<xml></xml>]"`);
	assert (cr == TokenType.StringLiteral);
}

Token lexTokenString(R)(ref R input, ref uint index, ref uint lineNumber,
	const StringStyle stringStyle = StringStyle.Default)
in
{
	assert (input.front == 'q');
}
body
{
	Token t;
	t.startIndex = index;
	t.type = TokenType.StringLiteral;
	t.lineNumber = lineNumber;
	auto app = appender!(ElementType!(R)[])();
	input.popFront(); // q
	input.popFront(); // {
	index += 2;
	if (stringStyle & StringStyle.IncludeQuotes)
	{
		app.put('q');
		app.put('{');
	}
	auto r = byToken(input, IterationStyle.Everything, StringStyle.Source);
	r.index = index;
	int depth = 1;
	while (!r.empty)
	{
		if (r.front.type == TokenType.LBrace)
		{
			++depth;
		}
		else if (r.front.type == TokenType.RBrace)
		{
			--depth;
			if (depth <= 0)
			{
				if (stringStyle & StringStyle.IncludeQuotes)
					app.put('}');
				r.popFront();
				break;
			}
		}
		app.put(r.front.value);
		r.popFront();
	}

	auto n = app.data.length - (stringStyle & StringStyle.IncludeQuotes ? 2 : 0);
	input.popFrontN(n);
	if (!input.isEoF())
	{
		switch (input.front)
		{
		case 'w':
			t.type = TokenType.WStringLiteral;
			goto case 'c';
		case 'd':
			t.type = TokenType.DStringLiteral;
			goto case 'c';
		case 'c':
			if (stringStyle & StringStyle.IncludeQuotes)
				app.put(input.front);
			input.popFront();
			++index;
			break;
		default:
			break;
		}
	}
	t.value = to!string(app.data);
	index = r.index;
	return t;
}

unittest
{
	uint i;
	uint l;
	auto a = "q{import std.stdio;} abcd";
	auto ar = lexTokenString(a, i, l);
	assert (ar == TokenType.StringLiteral);
	assert (ar == "import std.stdio;");

	auto b = `q{writeln("hello world");}`;
	auto br = lexTokenString(b, i, l, StringStyle.Source);
	assert (br == TokenType.StringLiteral);
	assert (br == `q{writeln("hello world");}`);
}

Token lexNumber(R)(ref R input, ref uint index, const uint lineNumber)
in
{
	assert(isDigit(input.front));
}
body
{
	auto app = appender!(ElementType!(R)[])();
	// hex and binary can start with zero, anything else is decimal
	if (input.front != '0')
		return lexDecimal(input, index, lineNumber, app);
	else
	{
		app.put(input.front);
		input.popFront();
		++index;
		switch (input.front)
		{
		case 'x':
		case 'X':
			app.put(input.front);
			input.popFront();
			++index;
			return lexHex(input, index, lineNumber, app);
		case 'b':
		case 'B':
			app.put(input.front);
			input.popFront();
			++index;
			return lexBinary(input, index, lineNumber, app);
		default:
			return lexDecimal(input, index, lineNumber, app);
		}
	}
}

unittest
{
	uint i;
	uint l;
	auto a = "0q1239";
	assert (lexNumber(a, i, l) == "0");
}

Token lexBinary(R, A)(ref R input, ref uint index, const uint lineNumber,
	ref A app)
{
	Token token;
	token.lineNumber = lineNumber;
	token.startIndex = index;
	token.type = TokenType.IntLiteral;
	bool lexingSuffix = false;
	bool isLong = false;
	bool isUnsigned = false;
	binaryLoop: while (!input.isEoF())
	{
		switch (input.front)
		{
		case '0':
		case '1':
		case '_':
			if (lexingSuffix)
				break binaryLoop;
			app.put(input.front);
			input.popFront();
			++index;
			break;
		case 'u':
		case 'U':
			if (isUnsigned)
				break binaryLoop;
			app.put(input.front);
			input.popFront();
			++index;
			if (isLong)
			{
				token.type = TokenType.UnsignedLongLiteral;
				break binaryLoop;
			}
			else
				token.type = TokenType.UnsignedIntLiteral;
			isUnsigned = true;
			break;
		case 'L':
			if (isLong)
				break binaryLoop;
			app.put(input.front);
			input.popFront();
			++index;
			lexingSuffix = true;
			if (isUnsigned)
			{
				token.type = TokenType.UnsignedLongLiteral;
				break binaryLoop;
			}
			else
				token.type = TokenType.LongLiteral;
			isLong = true;
			break;
		default:
			break binaryLoop;
		}
	}
	token.value = to!string(app.data);
	return token;
}

unittest
{
	uint i;
	uint l;

	auto a = "0b000101";
	auto ar = lexNumber(a, i, l);
	assert (ar.value == "0b000101");
	assert (a == "");

	auto b = "0b001L_";
	auto br = lexNumber(b, i, l);
	assert (br.value == "0b001L");
	assert (br.type == TokenType.LongLiteral);

	auto c = "0b1101uLL";
	auto cr = lexNumber(c, i, l);
	assert (cr.value == "0b1101uL");
	assert (cr.type == TokenType.UnsignedLongLiteral);

	auto d = "0b1q";
	auto dr = lexNumber(d, i, l);
	assert (dr.value == "0b1");
	assert (dr.type == TokenType.IntLiteral);

	auto e = "0b1_0_1LU";
	auto er = lexNumber(e, i, l);
	assert (er.value == "0b1_0_1LU");
	assert (er.type == TokenType.UnsignedLongLiteral);

	auto f = "0b1_0_1uU";
	auto fr = lexNumber(f, i, l);
	assert (fr.value == "0b1_0_1u");
	assert (fr.type == TokenType.UnsignedIntLiteral);

	auto g = "0b1_0_1LL";
	auto gr = lexNumber(g, i, l);
	assert (gr.value == "0b1_0_1L");
	assert (gr.type == TokenType.LongLiteral);
}


Token lexDecimal(R, A)(ref R input, ref uint index, const uint lineNumber,
	ref A app)
{
	bool lexingSuffix = false;
	bool isLong = false;
	bool isUnsigned = false;
	bool isFloat = false;
	bool isReal = false;
	bool isDouble = false;
	bool foundDot = false;
	bool foundE = false;
	bool foundPlusMinus = false;
	Token token;
	token.type = TokenType.IntLiteral;
	token.startIndex = index;
	token.lineNumber = lineNumber;
	decimalLoop: while (!input.isEoF())
	{
		switch (input.front)
		{
		case '0': .. case '9':
		case '_':
			if (lexingSuffix)
				break decimalLoop;
			app.put(input.front);
			input.popFront();
			++index;
			break;
		case 'e':
		case 'E':
			// For this to be a valid exponent, the next character must be a
			// decimal character or a sign
			auto r = input.save();
			r.popFront();
			if (foundE || r.isEoF())
				break decimalLoop;
			switch (r.front)
			{
			case '+':
			case '-':
				r.popFront();
				if (r.isEoF() || r.front < '0' || r.front > '9')
				{
					break decimalLoop;
				}
				break;
			case '0': .. case '9':
				break;
			default:
				break decimalLoop;
			}
			app.put(input.front);
			input.popFront();
			++index;
			foundE = true;
			isDouble = true;
			token.type = TokenType.DoubleLiteral;
			break;
		case '+':
		case '-':
			if (foundPlusMinus || !foundE)
				break decimalLoop;
			foundPlusMinus = true;
			app.put(input.front);
			input.popFront();
			++index;
			break;
		case '.':
			auto r = input.save();
			r.popFront();
			if (!r.isEoF() && r.front == '.')
				break decimalLoop; // possibly slice expression
			if (foundDot)
				break decimalLoop; // two dots with other characters between them
			app.put(input.front);
			input.popFront();
			++index;
			foundDot = true;
			token.type = TokenType.DoubleLiteral;
			isDouble = true;
			break;
		case 'u':
		case 'U':
			if (isUnsigned)
				break decimalLoop;
			app.put(input.front);
			input.popFront();
			++index;
			lexingSuffix = true;
			if (isLong)
				token.type = TokenType.UnsignedLongLiteral;
			else
				token.type = TokenType.UnsignedIntLiteral;
			isUnsigned = true;
			break;
		case 'L':
			if (isLong || isReal)
				break decimalLoop;
			app.put(input.front);
			input.popFront();
			++index;
			lexingSuffix = true;
			if (isDouble)
			{
				token.type = TokenType.RealLiteral;
				isReal = true;
			}
			else if (isUnsigned)
			{
				token.type = TokenType.UnsignedLongLiteral;
				isLong = true;
			}
			else
			{
				token.type = TokenType.LongLiteral;
				isLong = true;
			}
			break;
		case 'f':
		case 'F':
			lexingSuffix = true;
			if (isUnsigned || isLong)
				break decimalLoop;
			app.put(input.front);
			input.popFront();
			++index;
			token.type = TokenType.FloatLiteral;
			isFloat = true;
			break;
		case 'i':
			// Spec says that this is the last suffix, so all cases break the
			// loop.
			if (isReal)
			{
				app.put(input.front);
				input.popFront();
				++index;
				token.type = TokenType.IRealLiteral;
			}
			else if (isFloat)
			{
				app.put(input.front);
				input.popFront();
				++index;
				token.type = TokenType.IFloatLiteral;
			}
			else if (isDouble)
			{
				app.put(input.front);
				input.popFront();
				++index;
				token.type = TokenType.IDoubleLiteral;
			}
			break decimalLoop;
		default:
			break decimalLoop;
		}
	}
	token.value = to!string(app.data());
	return token;
}


unittest
{
	uint i;
	uint l;
	auto a = "55e-4";
	auto ar = lexNumber(a, i, l);
	assert (ar.value == "55e-4");
	assert (ar.type == TokenType.DoubleLiteral);

	auto b = "123.45f";
	auto br = lexNumber(b, i, l);
	assert (br.value == "123.45f");
	assert (br.type == TokenType.FloatLiteral);

	auto c = "3e+f";
	auto cr = lexNumber(c, i, l);
	assert (cr.value == "3");
	assert (cr.type == TokenType.IntLiteral);

	auto d = "3e++f";
	auto dr = lexNumber(d, i, l);
	assert (dr.value == "3");
	assert (dr.type == TokenType.IntLiteral);

	auto e = "1234..1237";
	auto er = lexNumber(e, i, l);
	assert (er.value == "1234");
	assert (er.type == TokenType.IntLiteral);

	auto f = "12L_";
	auto fr = lexNumber(f, i, l);
	assert (fr == "12L");

	auto g = "12e-12e";
	auto gr = lexNumber(g, i, l);
	assert (gr == "12e-12");

	auto h = "12e10";
	auto hr = lexNumber(h, i, l);
	assert (hr == "12e10");

	auto j = "12er";
	auto jr = lexNumber(j, i, l);
	assert (jr == "12");

	auto k = "12e+12-";
	auto kr = lexNumber(k, i, l);
	assert (kr == "12e+12");

	auto m = "1.1.";
	auto mr = lexNumber(m, i, l);
	assert (mr == "1.1");

	auto n = "12uu";
	auto nr = lexNumber(n, i, l);
	assert (nr == "12u");
	assert (nr.type == TokenType.UnsignedIntLiteral);

	auto o = "12LU";
	auto or = lexNumber(o, i, l);
	assert (or == "12LU");

	auto p = "3LL";
	auto pr = lexNumber(p, i, l);
	assert (pr == "3L");

	auto q = "3.0LL";
	auto qr = lexNumber(q, i, l);
	assert (qr == "3.0L");

	auto r = "5uL";
	auto rr = lexNumber(r, i, l);
	assert (rr == "5uL");

	auto s = "5Lf";
	auto sr = lexNumber(s, i, l);
	assert (sr == "5L");
	assert (sr == TokenType.LongLiteral);

	auto t = "5i";
	auto tr = lexNumber(t, i, l);
	assert (tr == "5");
	assert (tr == TokenType.IntLiteral);

	auto u = "894.3i";
	auto ur = lexNumber(u, i, l);
	assert (ur == "894.3i");
	assert (ur == TokenType.IDoubleLiteral);

	auto v = "894.3Li";
	auto vr = lexNumber(v, i, l);
	assert (vr == "894.3Li");
	assert (vr == TokenType.IRealLiteral);

	auto w = "894.3fi";
	auto wr = lexNumber(w, i, l);
	assert (wr == "894.3fi");
	assert (wr == TokenType.IFloatLiteral);

	auto x = "4892.4ee";
	auto xr = lexNumber(x, i, l);
	assert (xr == "4892.4");
	assert (xr == TokenType.DoubleLiteral);
}

Token lexHex(R, A)(ref R input, ref uint index, const uint lineNumber,
	ref A app)
{
	bool isLong = false;
	bool isUnsigned = false;
	bool isFloat = false;
	bool isReal = false;
	bool isDouble = false;
	bool foundDot = false;
	bool foundExp = false;
	bool foundPlusMinus = false;
	string backup;
	Token token;
	token.lineNumber = lineNumber;
	token.startIndex =  index;
	token.type = TokenType.IntLiteral;
	hexLoop: while (!input.isEoF())
	{
		switch (input.front)
		{
		case 'a': .. case 'f':
		case 'A': .. case 'F':
			if (foundExp)
				break hexLoop;
			else
				goto case;
		case '0': .. case '9':
		case '_':
			app.put(input.front);
			input.popFront();
			++index;
			break;
		case 'p':
		case 'P':
			if (foundExp)
				break hexLoop;
			auto r = input.save();
			r.popFront();
			switch (r.front)
			{
			case '-':
			case '+':
				r.popFront();
				if (r.isEoF() || !isDigit(r.front))
					break hexLoop;
				break;
			case '0': .. case '9':
				break;
			default:
				break hexLoop;
			}
			app.put(input.front);
			input.popFront();
			++index;
			foundExp = true;
			isDouble = true;
			token.type = TokenType.DoubleLiteral;
			break;
		case '+':
		case '-':
			if (foundPlusMinus || !foundExp)
				break hexLoop;
			foundPlusMinus = true;
			app.put(input.front);
			input.popFront();
			++index;
			break;
		case '.':
			auto r = input.save();
			r.popFront();
			if (!r.isEoF() && r.front == '.')
				break hexLoop; // slice expression
			if (foundDot)
				break hexLoop; // two dots with other characters between them
			app.put(input.front);
			input.popFront();
			++index;
			foundDot = true;
			token.type = TokenType.DoubleLiteral;
			break;
		default:
			break hexLoop;
		}
	}
	token.value = to!string(app.data);
	return token;
}

unittest
{
	uint i;
	uint l;

	auto a = "0x193abfq";
	auto ar = lexNumber(a, i, l);
	assert(ar.value == "0x193abf");
	assert(ar.type == TokenType.IntLiteral);

	auto b = "0x2130xabc";
	auto br = lexNumber(b, i, l);
	assert(br.value == "0x2130");
	assert(br.type == TokenType.IntLiteral);

	auto c = "0x123..0321";
	auto cr = lexNumber(c, i, l);
	assert (cr.value == "0x123");
	assert (cr.type == TokenType.IntLiteral);

	auto d = "0xabp5";
	auto dr = lexNumber(d, i, l);
	assert (dr == "0xabp5");
	assert (dr == TokenType.DoubleLiteral);

	auto e = "0x93p+5";
	auto er = lexNumber(e, i, l);
	assert (er == "0x93p+5");
	assert (er == TokenType.DoubleLiteral);

	auto f = "0x93pp";
	auto fr = lexNumber(f, i, l);
	assert (fr == "0x93");
	assert (fr == TokenType.IntLiteral);

	auto g = "0XF..7";
	auto gr = lexNumber(g, i, l);
	assert (gr == "0XF");
	assert (gr == TokenType.IntLiteral);

	auto h = "0x8.4p100";
	auto hr = lexNumber(h, i, l);
	assert (hr == "0x8.4p100");
	assert (hr == TokenType.DoubleLiteral);

	auto j = "0x8.4.100";
	auto jr = lexNumber(j, i, l);
	assert (jr == "0x8.4");
	assert (jr == TokenType.DoubleLiteral);

	auto k = "0x1p-t";
	auto kr = lexNumber(k, i, l);
	assert (kr == "0x1");
	assert (kr == TokenType.IntLiteral);

	auto m = "0x1p-5p";
	auto mr = lexNumber(m, i, l);
	assert (mr == "0x1p-5");
	assert (mr == TokenType.DoubleLiteral);

	auto n = "0x1p-c_";
	auto nr = lexNumber(n, i, l);
	assert (nr == "0x1");
	assert (nr == TokenType.IntLiteral);

	auto o = "0x1p-1a";
	auto or = lexNumber(o, i, l);
	assert (or == "0x1p-1");
	assert (or == TokenType.DoubleLiteral);

	auto p = "0x1p-1+";
	auto pr = lexNumber(p, i, l);
	assert (pr == "0x1p-1");
	assert (pr == TokenType.DoubleLiteral);
}

string lexSpecialTokenSequence(R)(ref R input, ref uint index,
    ref uint lineNumber)
in
{
    assert (input.front == '#');
}
body
{
    auto i = index;
    auto r = input.save;
    auto l = lineNumber;
    r.popFront();
    ++i;
    auto app = appender!(ElementType!(R)[])();
    app.put('#');

    auto specialType = appender!(ElementType!(R)[])();

    while (!r.empty && !isSeparating(r.front))
    {
        specialType.put(r.front);
        ++i;
        r.popFront();
    }

    if (to!string(specialType.data) != "line")
        return null;
    app.put(specialType.data);

    if (std.uni.isWhite(r.front))
        app.put(lexWhitespace(r, i, l).value);


    if (!isDigit(r.front))
        return null;

    auto t = lexNumber(r, i, l);
    if (t != TokenType.IntLiteral)
        return null;

    app.put(t.value);
    l = to!uint(t.value);

    if (!isNewline(r))
    {
        if (!r.empty && std.uni.isWhite(r.front))
            app.put(lexWhitespace(r, i, l).value);

        if (!r.empty && r.front == '"')
        {
            auto fSpecApp = appender!(ElementType!(R)[])();
            fSpecApp.put(r.front);
            r.popFront();
            ++i;
            while (!r.empty)
            {
                if (r.front == '"')
                {
                    fSpecApp.put('"');
                    ++i;
                    r.popFront();
                    break;
                }
                ++i;
                fSpecApp.put(r.front);
                r.popFront();
            }
            app.put(fSpecApp.data);
        }
        else
            return null;
    }

    app.put(popNewline(r, i));
    input.popFrontN(i - index);
    index = i;
    lineNumber = l;
    return to!string(app.data);
}

unittest
{
    uint i;
    uint l;
    auto a = "#line 10\n";
    auto ar = lexSpecialTokenSequence(a, i, l);
    assert (ar == "#line 10\n");
    assert (a == "");
    assert (l == 10);

    auto b = "#line 9201 \"test.d\"\n";
    auto br = lexSpecialTokenSequence(b, i, l);
    assert (l == 9201);
    assert (br == "#line 9201 \"test.d\"\n");
    assert (b == "");

    auto c = `#lin`;
    auto cr = lexSpecialTokenSequence(c, i, l);
    assert (l == 9201);
    assert (cr is null);
    assert (c == `#lin`);
}

pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
{
	switch (ch)
	{
		case '!': .. case '/':
		case ':': .. case '@':
		case '[': .. case '^':
		case '{': .. case '~':
		case '`':
		case 0x20: // space
		case 0x09: // tab
		case 0x0a: .. case 0x0d: // newline, vertical tab, form feed, carriage return
			return true;
		default:
			return false;
	}
}

pure nothrow TokenType lookupTokenType(const string input)
{
	switch(input.length)
	{
	case 2:
		switch (input)
		{
		case "do": return TokenType.Do;
		case "if": return TokenType.If;
		case "in": return TokenType.In;
		case "is": return TokenType.Is;
		default: break;
		}
		break;
	case 3:
		switch (input)
		{
		case "asm": return TokenType.Asm;
		case "for": return TokenType.For;
		case "int": return TokenType.Int;
		case "new": return TokenType.New;
		case "out": return TokenType.Out;
		case "ref": return TokenType.Ref;
		case "try": return TokenType.Try;
		default: break;
		}
		break;
	case 4:
		switch (input)
		{
		case "auto": return TokenType.Auto;
		case "body": return TokenType.Body;
		case "bool": return TokenType.Bool;
		case "byte": return TokenType.Byte;
		case "case": return TokenType.Case;
		case "cast": return TokenType.Cast;
		case "cent": return TokenType.Cent;
		case "char": return TokenType.Char;
		case "else": return TokenType.Else;
		case "enum": return TokenType.Enum;
		case "goto": return TokenType.Goto;
		case "lazy": return TokenType.Lazy;
		case "long": return TokenType.Long;
		case "null": return TokenType.Null;
		case "pure": return TokenType.Pure;
		case "real": return TokenType.Real;
		case "this": return TokenType.This;
		case "true": return TokenType.True;
		case "uint": return TokenType.Uint;
		case "void": return TokenType.Void;
		case "with": return TokenType.With;
		default: break;
		}
		break;
	case 5:
		switch (input)
		{
		case "alias": return TokenType.Alias;
		case "align": return TokenType.Align;
		case "break": return TokenType.Break;
		case "catch": return TokenType.Catch;
		case "class": return TokenType.Class;
		case "const": return TokenType.Const;
		case "creal": return TokenType.Creal;
		case "dchar": return TokenType.Dchar;
		case "debug": return TokenType.Debug;
		case "false": return TokenType.False;
		case "final": return TokenType.Final;
		case "float": return TokenType.Float;
		case "inout": return TokenType.Inout;
		case "ireal": return TokenType.Ireal;
		case "macro": return TokenType.Macro;
		case "mixin": return TokenType.Mixin;
		case "scope": return TokenType.Scope;
		case "short": return TokenType.Short;
		case "super": return TokenType.Super;
		case "throw": return TokenType.Throw;
		case "ubyte": return TokenType.Ubyte;
		case "ucent": return TokenType.Ucent;
		case "ulong": return TokenType.Ulong;
		case "union": return TokenType.Union;
		case "wchar": return TokenType.Wchar;
		case "while": return TokenType.While;
		default: break;
		}
		break;
	case 6:
		switch (input)
		{
		case "assert": return TokenType.Assert;
		case "cfloat": return TokenType.Cfloat;
		case "delete": return TokenType.Delete;
		case "double": return TokenType.Double;
		case "export": return TokenType.Export;
		case "extern": return TokenType.Extern;
		case "ifloat": return TokenType.Ifloat;
		case "import": return TokenType.Import;
		case "module": return TokenType.Module;
		case "pragma": return TokenType.Pragma;
		case "public": return TokenType.Public;
		case "return": return TokenType.Return;
		case "shared": return TokenType.Shared;
		case "static": return TokenType.Static;
		case "string": return TokenType.String;
		case "struct": return TokenType.Struct;
		case "switch": return TokenType.Switch;
		case "typeid": return TokenType.Typeid;
		case "typeof": return TokenType.Typeof;
		case "ushort": return TokenType.Ushort;
		default: break;
		}
		break;
	case 7:
		switch (input)
		{
		case "cdouble": return TokenType.Cdouble;
		case "default": return TokenType.Default;
		case "dstring": return TokenType.DString;
		case "finally": return TokenType.Finally;
		case "foreach": return TokenType.Foreach;
		case "idouble": return TokenType.Idouble;
		case "nothrow": return TokenType.Nothrow;
		case "package": return TokenType.Package;
		case "private": return TokenType.Private;
		case "typedef": return TokenType.Typedef;
		case "version": return TokenType.Version;
		case "wstring": return TokenType.WString;
		default: break;
		}
		break;
	case 8:
		switch (input)
		{
		case "override": return TokenType.Override;
		case "continue": return TokenType.Continue;
		case "__LINE__": return TokenType.Line;
		case "template": return TokenType.Template;
		case "abstract": return TokenType.Abstract;
		case "__thread": return TokenType.Thread;
		case "__traits": return TokenType.Traits;
		case "volatile": return TokenType.Volatile;
		case "delegate": return TokenType.Delegate;
		case "function": return TokenType.Function;
		case "unittest": return TokenType.Unittest;
		case "__FILE__": return TokenType.File;
		default: break;
		}
		break;
	case 9:
		switch (input)
		{
		case "__gshared": return TokenType.Gshared;
		case "immutable": return TokenType.Immutable;
		case "interface": return TokenType.Interface;
		case "invariant": return TokenType.Invariant;
		case "protected": return TokenType.Protected;
		default: break;
		}
		break;
	case 10:
		if (input == "deprecated")
			return TokenType.Deprecated;
		break;
	case 11:
		if (input == "synchronized")
			return TokenType.Synchronized;
		break;
	case 13:
		if (input == "foreach_reverse")
			return TokenType.Foreach_reverse;
		break;
	default: break;
	}
	return TokenType.Identifier;
}

class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
{
	/**
	 * Adds the given value to the trie with the given key
	 */
	void add(K key, V value) pure
	{
		TrieNode!(K,V) current = this;
		foreach(keyPart; key)
		{
			if ((keyPart in current.children) is null)
			{
				auto node = new TrieNode!(K, V);
				current.children[keyPart] = node;
				current = node;
			}
			else
				current = current.children[keyPart];
		}
		current.value = value;
	}
}

class TrieNode(K, V) if (isInputRange!K)
{
	V value;
	TrieNode!(K,V)[ElementType!K] children;
}

string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
{
	string caseStatement = "";
	foreach(dchar k, TrieNode!(K,V) v; node.children)
	{
		caseStatement ~= indentString;
		caseStatement ~= "case '";
		caseStatement ~= k;
		caseStatement ~= "':\n";
		caseStatement ~= indentString;
		caseStatement ~= "\tcurrent.value ~= '";
		caseStatement ~= k;
		caseStatement ~= "';\n";
		caseStatement ~= indentString;
		caseStatement ~= "\t++index;\n";
		caseStatement ~= indentString;
		caseStatement ~= "\trange.popFront();\n";
		if (v.children.length > 0)
		{
			caseStatement ~= indentString;
			caseStatement ~= "\tif (range.isEoF())\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t{\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value;
			caseStatement ~= ";\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t\tbreak;\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t}\n";
			caseStatement ~= indentString;
			caseStatement ~= "\tswitch (range.front)\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t{\n";
			caseStatement ~= printCaseStatements(v, indentString ~ "\t");
			caseStatement ~= indentString;
			caseStatement ~= "\tdefault:\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t\tcurrent.type = ";
			caseStatement ~= v.value;
			caseStatement ~= ";\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t\tbreak;\n";
			caseStatement ~= indentString;
			caseStatement ~= "\t}\n";
			caseStatement ~= indentString;
			caseStatement ~= "\tbreak;\n";
		}
		else
		{
			caseStatement ~= indentString;
			caseStatement ~= "\tcurrent.type = ";
			caseStatement ~= v.value;
			caseStatement ~= ";\n";
			caseStatement ~= indentString;
			caseStatement ~= "\tbreak;\n";
		}
	}
	return caseStatement;
}

string generateCaseTrie(string[] args ...)
{
	auto t = new Trie!(string, string);
	for(int i = 0; i < args.length; i+=2)
	{
		t.add(args[i], args[i+1]);
	}
	return printCaseStatements(t, "");
}

void main() {}