Updated lexer docs. Implemented delete and fp operator rules. Fixed bug with AST traversal

2014-01-26 22:47:21 -08:00 · 2014-01-26 22:47:21 -08:00 · d13d680b74
parent 2f78272fed
commit d13d680b74
9 changed files with 995 additions and 85 deletions
--- a/analysis/del.d
+++ b/analysis/del.d
@ -0,0 +1,29 @@
+//          Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.del;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+/**
+ * Checks for use of the deprecated "delete" keyword
+ */
+class DeleteCheck : BaseAnalyzer
+{
+	alias visit = BaseAnalyzer.visit;
+
+	this(string fileName)
+	{
+		super(fileName);
+	}
+
+	override void visit(DeleteExpression d)
+	{
+		addErrorMessage(d.line, d.column, "Avoid using the deprecated delete keyword");
+		d.accept(this);
+	}
+}
--- a/analysis/fish.d
+++ b/analysis/fish.d
@ -0,0 +1,38 @@
+//          Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.fish;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+/**
+ * Checks for use of the deprecated floating point comparison operators.
+ */
+class FloatOperatorCheck : BaseAnalyzer
+{
+	alias visit = BaseAnalyzer.visit;
+
+	this(string fileName)
+	{
+		super(fileName);
+	}
+
+	override void visit(RelExpression r)
+	{
+		if (r.operator == tok!"<>"
+			|| r.operator == tok!"!<>"
+			|| r.operator == tok!"!>"
+			|| r.operator == tok!"!<"
+			|| r.operator == tok!"!<>="
+			|| r.operator == tok!"!>="
+			|| r.operator == tok!"!<=")
+		{
+			addErrorMessage(r.line, r.column, "Avoid using the deprecated floating-point operators");
+		}
+		r.accept(this);
+	}
+}
--- a/analysis/run.d
+++ b/analysis/run.d
@ -15,6 +15,8 @@ import analysis.base;
 import analysis.style;
 import analysis.enumarrayliteral;
 import analysis.pokemon;
+import analysis.del;
+import analysis.fish;

 void messageFunction(string fileName, size_t line, size_t column, string message,
 	bool isError)
@ -63,8 +65,14 @@ void analyze(File output, string[] fileNames, bool staticAnalyze = true)
 		auto pokemon = new PokemonExceptionCheck(fileName);
 		pokemon.visit(m);

+		auto del = new DeleteCheck(fileName);
+		del.visit(m);
+
+		auto fish = new FloatOperatorCheck(fileName);
+		fish.visit(m);
+
 		foreach (message; sort(chain(enums.messages, style.messages,
-			pokemon.messages).array))
+			pokemon.messages, del.messages, fish.messages).array))
 		{
 			writeln(message);
 		}
--- a/main.d
+++ b/main.d
@ -109,10 +109,11 @@ int main(string[] args)
 		}
 		else if (tokenDump)
 		{
+			writeln("text                    blank\tindex\tline\tcolumn\tcomment");
 			foreach (token; tokens)
 			{
-				writeln("«", token.text is null ? str(token.type) : token.text,
-					"» ", token.text !is null, " ", token.index, " ", token.line, " ", token.column, " ",
+				writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
+					token.text !is null, token.index, token.line, token.column,
 					token.comment);
 			}
 			return 0;
--- a/stdx/d/ast.d
+++ b/stdx/d/ast.d
@ -31,6 +31,58 @@ import std.string;
 abstract class ASTVisitor
 {
 public:
+
+	void visit(ExpressionNode n)
+	{
+		if (cast(AddExpression) n) visit(cast(AddExpression) n);
+		else if (cast(AndAndExpression) n) visit(cast(AndAndExpression) n);
+		else if (cast(AndExpression) n) visit(cast(AndExpression) n);
+		else if (cast(AsmAddExp) n) visit(cast(AsmAddExp) n);
+		else if (cast(AsmAndExp) n) visit(cast(AsmAndExp) n);
+		else if (cast(AsmEqualExp) n) visit(cast(AsmEqualExp) n);
+		else if (cast(AsmLogAndExp) n) visit(cast(AsmLogAndExp) n);
+		else if (cast(AsmLogOrExp) n) visit(cast(AsmLogOrExp) n);
+		else if (cast(AsmMulExp) n) visit(cast(AsmMulExp) n);
+		else if (cast(AsmOrExp) n) visit(cast(AsmOrExp) n);
+		else if (cast(AsmRelExp) n) visit(cast(AsmRelExp) n);
+		else if (cast(AsmShiftExp) n) visit(cast(AsmShiftExp) n);
+		else if (cast(AssertExpression) n) visit(cast(AssertExpression) n);
+		else if (cast(AssignExpression) n) visit(cast(AssignExpression) n);
+		else if (cast(CmpExpression) n) visit(cast(CmpExpression) n);
+		else if (cast(DeleteExpression) n) visit(cast(DeleteExpression) n);
+		else if (cast(EqualExpression) n) visit(cast(EqualExpression) n);
+		else if (cast(Expression) n) visit(cast(Expression) n);
+		else if (cast(FunctionCallExpression) n) visit(cast(FunctionCallExpression) n);
+		else if (cast(FunctionLiteralExpression) n) visit(cast(FunctionLiteralExpression) n);
+		else if (cast(IdentityExpression) n) visit(cast(IdentityExpression) n);
+		else if (cast(ImportExpression) n) visit(cast(ImportExpression) n);
+		else if (cast(IndexExpression) n) visit(cast(IndexExpression) n);
+		else if (cast(InExpression) n) visit(cast(InExpression) n);
+		else if (cast(IsExpression) n) visit(cast(IsExpression) n);
+		else if (cast(LambdaExpression) n) visit(cast(LambdaExpression) n);
+		else if (cast(MixinExpression) n) visit(cast(MixinExpression) n);
+		else if (cast(MulExpression) n) visit(cast(MulExpression) n);
+		else if (cast(NewAnonClassExpression) n) visit(cast(NewAnonClassExpression) n);
+		else if (cast(NewExpression) n) visit(cast(NewExpression) n);
+		else if (cast(OrExpression) n) visit(cast(OrExpression) n);
+		else if (cast(OrOrExpression) n) visit(cast(OrOrExpression) n);
+		else if (cast(PostIncDecExpression) n) visit(cast(PostIncDecExpression) n);
+		else if (cast(PowExpression) n) visit(cast(PowExpression) n);
+		else if (cast(PragmaExpression) n) visit(cast(PragmaExpression) n);
+		else if (cast(PreIncDecExpression) n) visit(cast(PreIncDecExpression) n);
+		else if (cast(PrimaryExpression) n) visit(cast(PrimaryExpression) n);
+		else if (cast(RelExpression) n) visit(cast(RelExpression) n);
+		else if (cast(ShiftExpression) n) visit(cast(ShiftExpression) n);
+		else if (cast(SliceExpression) n) visit(cast(SliceExpression) n);
+		else if (cast(TemplateMixinExpression) n) visit(cast(TemplateMixinExpression) n);
+		else if (cast(TernaryExpression) n) visit(cast(TernaryExpression) n);
+		else if (cast(TraitsExpression) n) visit(cast(TraitsExpression) n);
+		else if (cast(TypeidExpression) n) visit(cast(TypeidExpression) n);
+		else if (cast(TypeofExpression) n) visit(cast(TypeofExpression) n);
+		else if (cast(UnaryExpression) n) visit(cast(UnaryExpression) n);
+		else if (cast(XorExpression) n) visit(cast(XorExpression) n);
+	}
+
    /** */ void visit(AddExpression addExpression) { addExpression.accept(this); }
    /** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); }
    /** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); }
@ -104,7 +156,6 @@ public:
    /** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); }
    /** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); }
    /** */ void visit(Expression expression) { expression.accept(this); }
-    /** */ void visit(ExpressionNode expressionNode) { expressionNode.accept(this); }
    /** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); }
    /** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); }
    /** */ void visit(Finally finally_) { finally_.accept(this); }
@ -234,10 +285,11 @@ public:

 interface ASTNode
 {
+public:
    /** */ void accept(ASTVisitor visitor);
 }

-immutable string DEFAULT_ACCEPT = q{void accept(ASTVisitor visitor) {}};
+immutable string DEFAULT_ACCEPT = q{override void accept(ASTVisitor visitor) {}};

 template visitIfNotNull(fields ...)
 {
@ -259,19 +311,28 @@ template visitIfNotNull(fields ...)
    }
 }

-abstract class ExpressionNode : ASTNode {}
+abstract class ExpressionNode : ASTNode
+{
+public:
+	override void accept(ASTVisitor visitor)
+	{
+		assert (false);
+	}
+}

 mixin template BinaryExpressionBody()
 {
    ExpressionNode left;
    ExpressionNode right;
+	size_t line;
+	size_t column;
 }

 ///
 class AddExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -283,7 +344,7 @@ public:
 class AliasDeclaration : ASTNode
 {
 public:
-    void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(type, name, initializers));
    }
@ -332,7 +393,7 @@ public:
 class AndAndExpression : ExpressionNode
 {
 public:
-    void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -343,7 +404,7 @@ public:
 class AndExpression : ExpressionNode
 {
 public:
-    void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -566,7 +627,7 @@ public:
 class AssertExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(assertion, message));
    }
@ -578,7 +639,7 @@ public:
 class AssignExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(ternaryExpression, assignExpression));
    }
@ -816,7 +877,7 @@ public:
 class CmpExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(shiftExpression, equalExpression,
            identityExpression, relExpression, inExpression));
@ -1031,11 +1092,13 @@ public:
 class DeleteExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression));
    }
    /** */ UnaryExpression unaryExpression;
+	/** */ size_t line;
+	/** */ size_t column;
 }

 ///
@ -1151,7 +1214,7 @@ public:
 class EqualExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1163,7 +1226,7 @@ public:
 class Expression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(items));
    }
@ -1293,7 +1356,7 @@ public:
 class FunctionCallExpression : ExpressionNode
 {
 public:
-    void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments));
    }
@ -1306,7 +1369,7 @@ public:
 class FunctionCallStatement : ASTNode
 {
 public:
-    void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(functionCallExpression));
    }
@ -1338,7 +1401,7 @@ public:
 class FunctionLiteralExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(type, parameters, functionAttributes,
            functionBody));
@ -1413,7 +1476,7 @@ public:
 class IdentityExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1478,7 +1541,7 @@ public:
 class ImportExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(assignExpression));
    }
@ -1489,7 +1552,7 @@ public:
 class IndexExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression, argumentList));
    }
@ -1501,7 +1564,7 @@ public:
 class InExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1575,7 +1638,7 @@ public:
 class IsExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(type, identifier, typeSpecialization,
            templateParameterList));
@ -1626,7 +1689,7 @@ public:
 class LambdaExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(identifier, parameters, functionAttributes,
            assignExpression));
@ -1689,7 +1752,7 @@ public:
 class MixinExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(assignExpression));
    }
@ -1748,7 +1811,7 @@ public:
 class MulExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1760,7 +1823,7 @@ public:
 class NewAnonClassExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(allocatorArguments, constructorArguments,
            baseClassList, structBody));
@ -1775,7 +1838,7 @@ public:
 class NewExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(newAnonClassExpression, type, arguments,
            assignExpression));
@ -1863,7 +1926,7 @@ public:
 class OrExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1874,7 +1937,7 @@ public:
 class OrOrExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1937,7 +2000,7 @@ public:
 class PostIncDecExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression));
    }
@ -1949,7 +2012,7 @@ public:
 class PowExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -1971,7 +2034,7 @@ public:
 class PragmaExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(identifier, argumentList));
    }
@ -1983,7 +2046,7 @@ public:
 class PreIncDecExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression));
    }
@ -1995,7 +2058,7 @@ public:
 class PrimaryExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(basicType, primary, typeofExpression,
            typeidExpression, arrayLiteral, assocArrayLiteral, expression,
@ -2035,7 +2098,7 @@ public:
 class RelExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -2096,7 +2159,7 @@ public:
 class ShiftExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
@ -2120,7 +2183,7 @@ public:
 class SliceExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(unaryExpression, lower, upper));
    }
@ -2409,7 +2472,7 @@ public:
 class TemplateMixinExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName));
    }
@ -2534,7 +2597,7 @@ public:
 class TernaryExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression));
    }
@ -2558,7 +2621,7 @@ public:
 class TraitsExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(identifier, templateArgumentList));
    }
@ -2647,7 +2710,7 @@ public:
 class TypeidExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(type, expression));
    }
@ -2659,7 +2722,7 @@ public:
 class TypeofExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(expression, return_));
    }
@ -2671,7 +2734,7 @@ public:
 class UnaryExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        // TODO prefix, postfix, unary
        mixin (visitIfNotNull!(primaryExpression, newExpression,
@ -2803,7 +2866,7 @@ public:
 class XorExpression : ExpressionNode
 {
 public:
-    /+override+/ void accept(ASTVisitor visitor)
+    override void accept(ASTVisitor visitor)
    {
        mixin (visitIfNotNull!(left, right));
    }
--- a/stdx/d/parser.d
+++ b/stdx/d/parser.d
@ -1874,6 +1874,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
    {
        mixin(traceEnterAndExit!(__FUNCTION__));
        auto node = new DeleteExpression;
+		node.line = current.line;
+		node.column = current.column;
        if (expect(tok!"delete") is null) return null;
        node.unaryExpression = parseUnaryExpression();
        return node;
@ -3990,6 +3992,7 @@ q{(int a, ...)
     */
    PragmaDeclaration parsePragmaDeclaration()
    {
+		mixin (traceEnterAndExit!(__FUNCTION__));
        auto node = new PragmaDeclaration;
        node.pragmaExpression = parsePragmaExpression();
        expect(tok!";");
@ -4005,6 +4008,7 @@ q{(int a, ...)
     */
    PragmaExpression parsePragmaExpression()
    {
+		mixin (traceEnterAndExit!(__FUNCTION__));
        auto node = new PragmaExpression;
        expect(tok!"pragma");
        expect(tok!"(");
@ -4264,8 +4268,9 @@ q{(int a, ...)
     *     | $(LITERAL '!<=')
     *     ;)
     */
-    ExpressionNode parseRelExpression(ExpressionNode shift = null)
+    ExpressionNode parseRelExpression(ExpressionNode shift)
    {
+        mixin (traceEnterAndExit!(__FUNCTION__));
        return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression,
            tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>",
            tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<",
@ -6238,7 +6243,11 @@ protected:
        {
            auto n = new ExpressionType;
            static if (__traits(hasMember, ExpressionType, "operator"))
+			{
+				n.line = current.line;
+				n.column = current.column;
                n.operator = advance().type;
+			}
            else
                advance();
            n.left = node;
--- a/stdx/lexer.d
+++ b/stdx/lexer.d
@ -1,8 +1,99 @@
 // Written in the D programming language

 /**
+ * $(H2 Summary)
 * This module contains a range-based _lexer generator.
 *
+ * $(H2 Overview)
+ * The _lexer generator consists of a template mixin, $(LREF Lexer), along with
+ * several helper templates for generating such things as token identifiers.
+ *
+ * To write a _lexer using this API:
+ * $(OL
+ *     $(LI Create the string array costants for your language.
+ *         $(UL
+ *             $(LI $(LINK2 #.StringConstants, String Constants))
+ *         ))
+ *     $(LI Create aliases for the various token and token identifier types
+ *         specific to your language.
+ *         $(UL
+ *             $(LI $(LREF TokenIdType))
+ *             $(LI $(LREF tokenStringRepresentation))
+ *             $(LI $(LREF TokenStructure))
+ *             $(LI $(LREF TokenId))
+ *         ))
+ *     $(LI Create a struct that mixes in the Lexer template mixin and
+ *         implements the necessary functions.
+ *         $(UL
+ *             $(LI $(LREF Lexer))
+ *         ))
+ * )
+ * Examples:
+ * $(UL
+ * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
+ * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
+ * )
+ * $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
+ * $(DL
+ * $(DT $(B staticTokens))
+ * $(DD A listing of the tokens whose exact value never changes and which cannot
+ *     possibly be a token handled by the default token lexing function. The
+ *     most common example of this kind of token is an operator such as
+ *     $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
+ * $(DT $(B dynamicTokens))
+ * $(DD A listing of tokens whose value is variable, such as whitespace,
+ *     identifiers, number literals, and string literals.)
+ * $(DT $(B possibleDefaultTokens))
+ * $(DD A listing of tokens that could posibly be one of the tokens handled by
+ *     the default token handling function. An common example of this is
+ *     a keyword such as $(D_STRING "for"), which looks like the beginning of
+ *     the identifier $(D_STRING "fortunate"). isSeparating is called to
+ *     determine if the character after the $(D_STRING 'r') separates the
+ *     identifier, indicating that the token is $(D_STRING "for"), or if lexing
+ *     should be turned over to the defaultTokenFunction.)
+ * $(DT $(B tokenHandlers))
+ * $(DD A mapping of prefixes to custom token handling function names. The
+ *     generated _lexer will search for the even-index elements of this array,
+ *     and then call the function whose name is the element immedately after the
+ *     even-indexed element. This is used for lexing complex tokens whose prefix
+ *     is fixed.)
+ * )
+ *
+ * Here are some example constants for a simple calculator _lexer:
+ * ---
+ * // There are a near infinite number of valid number literals, so numbers are
+ * // dynamic tokens.
+ * enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
+ *
+ * // The operators are always the same, and cannot start a numberLiteral, so
+ * // they are staticTokens
+ * enum string[] staticTokens = ["-", "+", "*", "/"];
+ *
+ * // In this simple example there are no keywords or other tokens that could
+ * // look like dynamic tokens, so this is blank.
+ * enum string[] possibleDefaultTokens = [];
+ *
+ * // If any whitespace character or digit is encountered, pass lexing over to
+ * // our custom handler functions. These will be demonstrated in an example
+ * // later on.
+ * enum string[] tokenHandlers = [
+ *     "0", "lexNumber",
+ *     "1", "lexNumber",
+ *     "2", "lexNumber",
+ *     "3", "lexNumber",
+ *     "4", "lexNumber",
+ *     "5", "lexNumber",
+ *     "6", "lexNumber",
+ *     "7", "lexNumber",
+ *     "8", "lexNumber",
+ *     "9", "lexNumber",
+ *     " ", "lexWhitespace",
+ *     "\n", "lexWhitespace",
+ *     "\t", "lexWhitespace",
+ *     "\r", "lexWhitespace"
+ * ];
+ * ---
+ *
 * Copyright: Brian Schott 2013
 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
 * Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
@ -16,7 +107,12 @@ module stdx.lexer;
 * unsigned integral type that is able to hold the value
 * staticTokens.length + dynamicTokens.length. For example if there are 20
 * static tokens, 30 dynamic tokens, and 10 possible default tokens, this
- * template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max.
+ * template will alias itself to ubyte, as 20 + 30 + 10 < $(D_KEYWORD ubyte).max.
+ * Examples:
+ * ---
+ * // In our calculator example this means that IdType is an alias for ubyte.
+ * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
+ * ---
 */
 template TokenIdType(alias staticTokens, alias dynamicTokens,
 	alias possibleDefaultTokens)
@ -32,7 +128,15 @@ template TokenIdType(alias staticTokens, alias dynamicTokens,
 }

 /**
- * Looks up the string representation of the given token type.
+ * Looks up the string representation of the given token type. This is the
+ * opposite of the function of the TokenId template.
+ * Params: type = the token type identifier
+ * Examples:
+ * ---
+ * alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
+ * assert (str(tok!"*") == "*");
+ * ---
+ * See_also: $(LREF TokenId)
 */
 string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property
 {
@ -57,18 +161,18 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
 *         valid token type identifier)
 * )
 * In all cases this template will alias itself to a constant of type IdType.
+ * This template will fail at compile time if $(D_PARAM symbol) is not one of
+ * the staticTokens, dynamicTokens, or possibleDefaultTokens.
 * Examples:
 * ---
- * enum string[] staticTokens = ["+", "-", "*", "/"];
- * enum string[] dynamicTokens = ["number"];
- * enum string[] possibleDefaultTokens = [];
- * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
 * template tok(string symbol)
 * {
 *     alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
 *         possibleDefaultTokens, symbol);
 * }
+ * // num and plus are of type ubyte.
 * IdType plus = tok!"+";
+ * IdType num = tok!"numberLiteral";
 * ---
 */
 template TokenId(IdType, alias staticTokens, alias dynamicTokens,
@ -118,35 +222,49 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens,
 /**
 * The token that is returned by the lexer.
 * Params:
- *     IDType = The D type of the "type" token type field.
+ *     IdType = The D type of the "type" token type field.
 *     extraFields = A string containing D code for any extra fields that should
 *         be included in the token structure body. This string is passed
 *         directly to a mixin statement.
+ * Examples:
+ * ---
+ * // No extra struct fields are desired in this example, so leave it blank.
+ * alias Token = TokenStructure!(IdType, "");
+ * Token minusToken = Token(tok!"-");
+ * ---
 */
-struct TokenStructure(IDType, string extraFields = "")
+struct TokenStructure(IdType, string extraFields = "")
 {
 public:

 	/**
 	 * == overload for the the token type.
 	 */
-	bool opEquals(IDType type) const pure nothrow @safe
+	bool opEquals(IdType type) const pure nothrow @safe
 	{
 		return this.type == type;
 	}

 	/**
-	 *
+	 * Constructs a token from a token type.
+	 * Params: type = the token type
 	 */
-	this(IDType type)
+	this(IdType type)
 	{
 		this.type = type;
 	}

 	/**
-	 *
+	 * Constructs a token.
+	 * Params:
+	 *     type = the token type
+	 *     text = the text of the token, which may be null
+	 *     line = the line number at which this token occurs
+	 *     column = the column nmuber at which this token occurs
+	 *     index = the byte offset from the beginning of the input at which this
+	 *         token occurs
 	 */
-	this(IDType type, string text, size_t line, size_t column, size_t index)
+	this(IdType type, string text, size_t line, size_t column, size_t index)
 	{
 		this.text = text;
 		this.line = line;
@ -156,39 +274,105 @@ public:
 	}

 	/**
-	 *
+	 * The _text of the token.
 	 */
 	string text;

 	/**
-	 *
+	 * The line number at which this token occurs.
 	 */
 	size_t line;

 	/**
-	 *
+	 * The Column nmuber at which this token occurs.
 	 */
 	size_t column;

 	/**
-	 *
+	 * The byte offset from the beginning of the input at which this token
+	 * occurs.
 	 */
 	size_t index;

 	/**
-	 *
+	 * The token type.
 	 */
-	IDType type;
+	IdType type;

 	mixin (extraFields);
 }

+/**
+ * The implementation of the _lexer is contained within this mixin template.
+ * To use it, this template should be mixed in to a struct that represents the
+ * _lexer for your language. This struct should implement the following methods:
+ * $(UL
+ *     $(LI popFront, which should call this mixin's _popFront() and
+ *         additionally perform any token filtering or shuffling you deem
+ *         necessary. For example, you can implement popFront to skip comment or
+ *          tokens.)
+ *     $(LI A function that serves as the default token lexing function. For
+ *         most languages this will be the identifier lexing function.)
+ *     $(LI A function that is able to determine if an identifier/keyword has
+ *         come to an end. This function must retorn $(D_KEYWORD bool) and take
+ *         a single $(D_KEYWORD size_t) argument representing the number of
+ *         bytes to skip over before looking for a separating character.)
+ *     $(LI Any functions referred to in the tokenHandlers template paramater.
+ *         These functions must be marked $(D_KEYWORD pure nothrow), take no
+ *         arguments, and return a token)
+ *     $(LI A constructor that initializes the range field as well as calls
+ *         popFront() exactly once (to initialize the _front field).)
+ * )
+ * Examples:
+ * ---
+ * struct CalculatorLexer
+ * {
+ *     mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
+ *         staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
+ *
+ *     this (ubyte[] bytes)
+ *     {
+ *         this.range = LexerRange(bytes);
+ *         popFront();
+ *     }
+ *
+ *     void popFront() pure
+ *     {
+ *         _popFront();
+ *     }
+ *
+ *     Token lexNumber() pure nothrow @safe
+ *     {
+ *         ...
+ *     }
+ *
+ *     Token lexWhitespace() pure nothrow @safe
+ *     {
+ *         ...
+ *     }
+ *
+ *     Token defaultTokenFunction() pure nothrow @safe
+ *     {
+ *         // There is no default token in the example calculator language, so
+ *         // this is always an error.
+ *         range.popFront();
+ *         return Token(tok!"");
+ *     }
+ *
+ *     bool isSeparating(size_t offset) pure nothrow @safe
+ *     {
+ *         // For this example language, always return true.
+ *         return true;
+ *     }
+ * }
+ * ---
+ */
 mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 	alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
-	alias pseudoTokenHandlers, alias possibleDefaultTokens)
+	alias tokenHandlers, alias possibleDefaultTokens)
 {

-	static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must"
+	static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must"
 		~ " have a corresponding handler function name.");

 	static string generateMask(const ubyte[] arr)
@ -214,7 +398,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		import std.string;
 		import std.range;

-		string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2));
+		string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2));
 		string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
 		string code;
 		for (size_t i = 0; i < allTokens.length; i++)
@ -240,7 +424,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 			if (pseudoTokens.countUntil(tokens[0]) >= 0)
 			{
 				return "    return "
-					~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
+					~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
 					~ "();\n";
 			}
 			else if (staticTokens.countUntil(tokens[0]) >= 0)
@ -251,7 +435,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 			else if (pseudoTokens.countUntil(tokens[0]) >= 0)
 			{
 				return "    return "
-					~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
+					~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
 					~ "();\n";
 			}
 		}
@ -271,14 +455,14 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 				if (token.length <= 8)
 				{
 					code ~= "        return "
-						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
+						~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
 						~ "();\n";
 				}
 				else
 				{
 					code ~= "        if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
 					code ~= "            return "
-						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
+						~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
 						~ "();\n";
 				}
 			}
@ -325,16 +509,23 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		return code;
 	}

+	/**
+	 * Implements the range primitive front().
+	 */
 	ref const(Token) front() pure nothrow const @property
 	{
 		return _front;
 	}

+
 	void _popFront() pure
 	{
 		_front = advance();
 	}

+	/**
+	 * Implements the range primitive empty().
+	 */
 	bool empty() pure const nothrow @property
 	{
 		return _front.type == tok!"\0";
@ -359,9 +550,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		return retVal;
 	}

-	/**
-	 * This only exists because the real array() can't be called at compile-time
-	 */
+	// This only exists because the real array() can't be called at compile-time
 	static string[] stupidToArray(R)(R range)
 	{
 		string[] retVal;
@ -397,13 +586,30 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		}
 	}

+	/**
+	 * The lexer input.
+	 */
 	LexerRange range;
+
+	/**
+	 * The token that is currently at the front of the range.
+	 */
 	Token _front;
 }

+/**
+ * Range structure that wraps the _lexer's input.
+ */
 struct LexerRange
 {

+	/**
+	 * Params:
+	 *     bytes = the _lexer input
+	 *     index = the initial offset from the beginning of $(D_PARAM bytes)
+	 *     column = the initial column number
+	 *     line = the initial line number
+	 */
 	this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
 	{
 		this.bytes = bytes;
@ -412,31 +618,52 @@ struct LexerRange
 		this.line = line;
 	}

+	/**
+	 * Returns: a mark at the current position that can then be used with slice.
+	 */
 	size_t mark() const nothrow pure @safe
 	{
 		return index;
 	}

+	/**
+	 * Sets the range to the given position
+	 * Params: m = the position to seek to
+	 */
 	void seek(size_t m) nothrow pure @safe
 	{
 		index = m;
 	}

+	/**
+	 * Returs a slice of the input byte array betwene the given mark and the
+	 * current position.
+	 * Params m = the beginning index of the slice to return
+	 */
 	const(ubyte)[] slice(size_t m) const nothrow pure @safe
 	{
 		return bytes[m .. index];
 	}

+	/**
+	 * Implements the range primitive _empty.
+	 */
 	bool empty() const nothrow pure @safe
 	{
 		return index >= bytes.length;
 	}

+	/**
+	 * Implements the range primitive _front.
+	 */
 	ubyte front() const nothrow pure @safe
 	{
 		return bytes[index];
 	}

+	/**
+	 * Returns: the current item as well as the items $(D_PARAM p) items ahead.
+	 */
 	const(ubyte)[] peek(size_t p) const nothrow pure @safe
 	{
 		return index + p + 1 > bytes.length
@ -444,48 +671,79 @@ struct LexerRange
 			: bytes[index .. index + p + 1];
 	}

+	/**
+	 *
+	 */
 	ubyte peekAt(size_t offset) const nothrow pure @safe
 	{
 		return bytes[index + offset];
 	}

+	/**
+	 * Returns: true if it is possible to peek $(D_PARAM p) bytes ahead.
+	 */
 	bool canPeek(size_t p) const nothrow pure @safe
 	{
 		return index + p < bytes.length;
 	}

+	/**
+	 * Implements the range primitive _popFront.
+	 */
 	void popFront() pure nothrow @safe
 	{
 		index++;
 		column++;
 	}

+	/**
+	 * Implements the algorithm _popFrontN more efficiently.
+	 */
 	void popFrontN(size_t n) pure nothrow @safe
 	{
 		index += n;
+		column += n;
 	}

+	/**
+	 * Increments the range's line number and resets the column counter.
+	 */
 	void incrementLine() pure nothrow @safe
 	{
 		column = 1;
 		line++;
 	}

+	/**
+	 * The input _bytes.
+	 */
 	const(ubyte)[] bytes;
+
+	/**
+	 * The range's current position.
+	 */
 	size_t index;
+
+	/**
+	 * The current _column number.
+	 */
 	size_t column;
+
+	/**
+	 * The current _line number.
+	 */
 	size_t line;
 }

 /**
- * The string cache should be used within lexer implementations for several
- * reasons:
- * $(UL
- *     $(LI Reducing memory consumption.)
- *     $(LI Increasing performance in token comparisons)
- *     $(LI Correctly creating immutable token text if the lexing source is not
- *     immutable)
- * )
+ * The string cache implements a map/set for strings. Placing a string in the
+ * cache returns an identifier that can be used to instantly access the stored
+ * string. It is then possible to simply compare these indexes instead of
+ * performing full string comparisons when comparing the string content of
+ * dynamic tokens. The string cache also handles its own memory, so that mutable
+ * ubyte[] to lexers can still have immutable string fields in their tokens.
+ * Because the string cache also performs de-duplication it is possible to
+ * drastically reduce the memory usage of a lexer.
 */
 struct StringCache
 {
@ -493,7 +751,10 @@ public:

 	@disable this();

-	this(size_t bucketCount = defaultBucketCount)
+	/**
+	 * Params: bucketCount = the initial number of buckets.
+	 */
+	this(size_t bucketCount)
 	{
 		buckets = new Item*[bucketCount];
 	}
@ -512,6 +773,9 @@ public:
 		return get(cache(bytes));
 	}

+	/**
+	 * Equivalent to calling cache() and get().
+	 */
 	string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
 	{
 		return get(cache(bytes, hash));
@ -536,6 +800,11 @@ public:
 		return cache(bytes, hash);
 	}

+	/**
+	 * Caches a string as above, but uses the given has code instead of
+	 * calculating one itself. Use this alongside hashStep() can reduce the
+	 * amount of work necessary when lexing dynamic tokens.
+	 */
 	size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
 	in
 	{
@ -583,11 +852,21 @@ public:
 		writeln("rehashes:              ", rehashCount);
 	}

+	/**
+	 * Incremental hashing.
+	 * Params:
+	 *     b = the byte to add to the hash
+	 *     h = the hash that has been calculated so far
+	 * Returns: the new hash code for the string.
+	 */
 	static uint hashStep(ubyte b, uint h) pure nothrow @safe
 	{
 		return (h ^ sbox[b]) * 3;
 	}

+	/**
+	 * The default bucket count for the string cache.
+	 */
 	static enum defaultBucketCount = 2048;

 private:
--- a/stdx/lexer.html
+++ b/stdx/lexer.html
@ -0,0 +1,483 @@
+<h1>stdx.lexer</h1> <!-- Generated by Ddoc from lexer.d -->
+This module contains a range-based lexer generator.
+<p></p>
+The lexer generator consists of a template mixin, Lexer, along with several
+ helper templates for generating such things as token identifiers.
+<p></p>
+
+ To generate a lexer using this API, several constants must be supplied:
+ <dl><dt>staticTokens</dt>
+ <dd>A listing of the tokens whose exact value never changes and which cannot
+     possibly be a token handled by the default token lexing function. The
+     most common example of this kind of token is an operator such as "*", or
+     "-" in a programming language.</dd>
+ <dt>dynamicTokens</dt>
+ <dd>A listing of tokens whose value is variable, such as whitespace,
+     identifiers, number literals, and string literals.</dd>
+ <dt>possibleDefaultTokens</dt>
+ <dd>A listing of tokens that could posibly be one of the tokens handled by
+     the default token handling function. An common example of this is
+     a keyword such as <span class="d_string">"for"</span>, which looks like the beginning of
+     the identifier <span class="d_string">"fortunate"</span>. isSeparating is called to
+     determine if the character after the <span class="d_string">'r'</span> separates the
+     identifier, indicating that the token is <span class="d_string">"for"</span>, or if lexing
+     should be turned over to the defaultTokenFunction.</dd>
+ <dt>tokenHandlers</dt>
+ <dd>A mapping of prefixes to custom token handling function names. The
+     generated lexer will search for the even-index elements of this array,
+     and then call the function whose name is the element immedately after the
+     even-indexed element. This is used for lexing complex tokens whose prefix
+     is fixed.</dd>
+ </dl>
+<p></p>
+
+ Here are some example constants for a simple calculator lexer:
+<pre class="d_code"><span class="d_comment">// There are a near infinite number of valid number literals, so numbers are
+</span><span class="d_comment">// dynamic tokens.
+</span><span class="d_keyword">enum</span> string[] dynamicTokens = [<span class="d_string">"numberLiteral"</span>, <span class="d_string">"whitespace"</span>];
+
+<span class="d_comment">// The operators are always the same, and cannot start a numberLiteral, so
+</span><span class="d_comment">// they are staticTokens
+</span><span class="d_keyword">enum</span> string[] staticTokens = [<span class="d_string">"-"</span>, <span class="d_string">"+"</span>, <span class="d_string">"*"</span>, <span class="d_string">"/"</span>];
+
+<span class="d_comment">// In this simple example there are no keywords or other tokens that could
+</span><span class="d_comment">// look like dynamic tokens, so this is blank.
+</span><span class="d_keyword">enum</span> string[] possibleDefaultTokens = [];
+
+<span class="d_comment">// If any whitespace character or digit is encountered, pass lexing over to
+</span><span class="d_comment">// our custom handler functions. These will be demonstrated in an example
+</span><span class="d_comment">// later on.
+</span><span class="d_keyword">enum</span> string[] tokenHandlers = [
+    <span class="d_string">"0"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"1"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"2"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"3"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"4"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"5"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"6"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"7"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"8"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">"9"</span>, <span class="d_string">"lexNumber"</span>,
+    <span class="d_string">" "</span>, <span class="d_string">"lexWhitespace"</span>,
+    <span class="d_string">"\n"</span>, <span class="d_string">"lexWhitespace"</span>,
+    <span class="d_string">"\t"</span>, <span class="d_string">"lexWhitespace"</span>,
+    <span class="d_string">"\r"</span>, <span class="d_string">"lexWhitespace"</span>
+];
+</pre>
+
+<p></p>
+<b>Examples:</b><br><ul><li>A lexer for D is available <a href="https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d">here</a>.</li>
+ <li>A lexer for Lua is available <a href="https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d">here</a>.</li>
+ </ul>
+<p></p>
+<b>License:</b><br><a href="http://www.boost.org/LICENSE_1_0.txt Boost">License 1.0</a>
+<p></p>
+<b>Authors:</b><br>Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
+<p></p>
+<b>Source:</b><br>
+<a href="https://github.com/D-Programming-Language/phobos/blob/master/std/lexer.d">std/lexer.d</a><p></p>
+
+<dl><dt class="d_decl"><a name=".TokenIdType"></a>template <a name="TokenIdType"></a><span class="ddoc_psymbol">TokenIdType</span>(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)</dt>
+<dd>Template for determining the type used for a token type. Selects the smallest
+ unsigned integral type that is able to hold the value
+ staticTokens.length + dynamicTokens.length. For example if there are 20
+ static tokens, 30 dynamic tokens, and 10 possible default tokens, this
+ template will alias itself to ubyte, as 20 + 30 + 10 &lt; <span class="d_keyword">ubyte</span>.max.
+<p></p>
+<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// In our calculator example this means that IdType is an alias for ubyte.
+</span><span class="d_keyword">alias</span> IdType = <span class="d_psymbol">TokenIdType</span>!(staticTokens, dynamicTokens, possibleDefaultTokens);
+</pre>
+<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".tokenStringRepresentation"></a>@property string <a name="tokenStringRepresentation"></a><span class="ddoc_psymbol">tokenStringRepresentation</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType <i>type</i>);
+</dt>
+<dd>Looks up the string representation of the given token type. This is the
+ opposite of the function of the TokenId template.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>IdType type</td>
+<td valign=top>the token type identifier</td></tr>
+</table><p></p>
+<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">alias</span> str = <span class="d_psymbol">tokenStringRepresentation</span>(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
+<span class="d_keyword">assert</span> (str(tok!<span class="d_string">"*"</span>) == <span class="d_string">"*"</span>);
+</pre>
+<p></p>
+<b>See Also:</b><br>TokenId<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".TokenId"></a>template <a name="TokenId"></a><span class="ddoc_psymbol">TokenId</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)</dt>
+<dd>Generates the token type identifier for the given symbol. There are two
+ special cases:
+ <ul>    <li>If symbol is "", then the token identifier will be 0</li>
+     <li>If symbol is "\0", then the token identifier will be the maximum
+         valid token type identifier</li>
+ </ul>
+ In all cases this template will alias itself to a constant of type IdType.
+ This template will fail at compile time if <span class="d_param">symbol</span> is not one of
+ the staticTokens, dynamicTokens, or possibleDefaultTokens.
+<p></p>
+<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">template</span> tok(string symbol)
+{
+    <span class="d_keyword">alias</span> tok = <span class="d_psymbol">TokenId</span>!(IdType, staticTokens, dynamicTokens,
+        possibleDefaultTokens, symbol);
+}
+<span class="d_comment">// num and plus are of type ubyte.
+</span>IdType plus = tok!<span class="d_string">"+"</span>;
+IdType num = tok!<span class="d_string">"numberLiteral"</span>;
+</pre>
+<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".TokenStructure"></a>struct <a name="TokenStructure"></a><span class="ddoc_psymbol">TokenStructure</span>(IdType, string extraFields = "");
+</dt>
+<dd>The token that is returned by the lexer.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>IdType</td>
+<td valign=top>The D type of the "type" token type field.</td></tr>
+<tr><td valign=top>extraFields</td>
+<td valign=top>A string containing D code for any extra fields that should
+         be included in the token structure body. This string is passed
+         directly to a mixin statement.</td></tr>
+</table><p></p>
+<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// No extra struct fields are desired in this example, so leave it blank.
+</span><span class="d_keyword">alias</span> Token = <span class="d_psymbol">TokenStructure</span>!(IdType, <span class="d_string">""</span>);
+Token minusToken = Token(tok!<span class="d_string">"-"</span>);
+</pre>
+<p></p>
+
+<dl><dt class="d_decl"><a name=".opEquals"></a>const pure nothrow @safe bool <a name="opEquals"></a><span class="ddoc_psymbol">opEquals</span>(IdType <i>type</i>);
+</dt>
+<dd>== overload for the the token <i>type</i>.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>);
+</dt>
+<dd>Constructs a token from a token <i>type</i>.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
+<td valign=top>the token <i>type</i></td></tr>
+</table><p></p>
+
+</dd>
+<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>, string <i>text</i>, size_t <i>line</i>, size_t <i>column</i>, size_t <i>index</i>);
+</dt>
+<dd>Constructs a token.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
+<td valign=top>the token <i>type</i></td></tr>
+<tr><td valign=top>string <i>text</i></td>
+<td valign=top>the <i>text</i> of the token, which may be <b>null</b></td></tr>
+<tr><td valign=top>size_t <i>line</i></td>
+<td valign=top>the <i>line</i> number at which this token occurs</td></tr>
+<tr><td valign=top>size_t <i>column</i></td>
+<td valign=top>the <i>column</i> nmuber at which this token occurs</td></tr>
+<tr><td valign=top>size_t <i>index</i></td>
+<td valign=top>the byte offset from the beginning of the input at which this
+         token occurs</td></tr>
+</table><p></p>
+
+</dd>
+<dt class="d_decl"><a name=".text"></a>string <a name="text"></a><span class="ddoc_psymbol">text</span>;
+</dt>
+<dd>The <a name="text"></a><span class="ddoc_psymbol">text</span> of the token.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
+</dt>
+<dd>The <a name="line"></a><span class="ddoc_psymbol">line</span> number at which this token occurs.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
+</dt>
+<dd>The Column nmuber at which this token occurs.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
+</dt>
+<dd>The byte offset from the beginning of the input at which this token
+ occurs.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".type"></a>IdType <a name="type"></a><span class="ddoc_psymbol">type</span>;
+</dt>
+<dd>The token <a name="type"></a><span class="ddoc_psymbol">type</span>.<p></p>
+
+</dd>
+</dl>
+</dd>
+<dt class="d_decl"><a name=".Lexer"></a>template <a name="Lexer"></a><span class="ddoc_psymbol">Lexer</span>(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)</dt>
+<dd>The implementation of the lexer is contained within this mixin template.
+ To use it, this template should be mixed in to a struct that represents the
+ lexer for your language. This struct should implement the following methods:
+ <ul>    <li>popFront, which should call this mixin's popFront() and
+         additionally perform any token filtering or shuffling you deem
+         necessary. For example, you can implement popFront to skip comment or
+          tokens.</li>
+     <li>A function that serves as the default token lexing function. For
+         most languages this will be the identifier lexing function.</li>
+     <li>A function that is able to determine if an identifier/keyword has
+         come to an end. This function must retorn <span class="d_keyword">bool</span> and take
+         a single <span class="d_keyword">size_t</span> argument representing the number of
+         bytes to skip over before looking for a separating character.</li>
+     <li>Any functions referred to in the tokenHandlers template paramater.
+         These functions must be marked <span class="d_keyword">pure nothrow</span>, take no
+         arguments, and return a token</li>
+     <li>A constructor that initializes the range field as well as calls
+         popFront() exactly once (to initialize the front field).</li>
+ </ul>
+<p></p>
+<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">struct</span> CalculatorLexer
+{
+    <span class="d_keyword">mixin</span> <span class="d_psymbol">Lexer</span>!(IdType, Token, defaultTokenFunction, isSeparating,
+        staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
+
+    <span class="d_keyword">this</span> (<span class="d_keyword">ubyte</span>[] bytes)
+    {
+        <span class="d_keyword">this</span>.range = LexerRange(bytes);
+        popFront();
+    }
+
+    <span class="d_keyword">void</span> popFront() <span class="d_keyword">pure</span>
+    {
+        _popFront();
+    }
+
+    Token lexNumber() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
+    {
+        ...
+    }
+
+    Token lexWhitespace() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
+    {
+        ...
+    }
+
+    Token defaultTokenFunction() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
+    {
+        <span class="d_comment">// There is no default token in the example calculator language, so
+</span>        <span class="d_comment">// this is always an error.
+</span>        range.popFront();
+        <span class="d_keyword">return</span> Token(tok!<span class="d_string">""</span>);
+    }
+
+    <span class="d_keyword">bool</span> isSeparating(size_t offset) <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
+    {
+        <span class="d_comment">// For this example language, always return true.
+</span>        <span class="d_keyword">return</span> <span class="d_keyword">true</span>;
+    }
+}
+</pre>
+<p></p>
+
+<dl><dt class="d_decl"><a name=".front"></a>const pure nothrow @property const(Token) <a name="front"></a><span class="ddoc_psymbol">front</span>();
+</dt>
+<dd>Implements the range primitive <a name="front"></a><span class="ddoc_psymbol">front</span>().<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".empty"></a>const pure nothrow @property bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
+</dt>
+<dd>Implements the range primitive <a name="empty"></a><span class="ddoc_psymbol">empty</span>().<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".range"></a>LexerRange <a name="range"></a><span class="ddoc_psymbol">range</span>;
+</dt>
+<dd>The lexer input.<p></p>
+
+</dd>
+<dt class="d_decl"><a name="._front"></a>Token <a name="_front"></a><span class="ddoc_psymbol">_front</span>;
+</dt>
+<dd>The token that is currently at the front of the range.<p></p>
+
+</dd>
+</dl>
+</dd>
+<dt class="d_decl"><a name=".LexerRange"></a>struct <a name="LexerRange"></a><span class="ddoc_psymbol">LexerRange</span>;
+</dt>
+<dd>Range structure that wraps the lexer's input.<p></p>
+
+<dl><dt class="d_decl"><a name=".LexerRange.this"></a>pure nothrow @safe  this(const(ubyte)[] <i>bytes</i>, size_t <i>index</i> = 0, size_t <i>column</i> = 1, size_t <i>line</i> = 1);
+</dt>
+<dd><b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
+<td valign=top>the lexer input</td></tr>
+<tr><td valign=top>size_t <i>index</i></td>
+<td valign=top>the initial offset from the beginning of <span class="d_param"><i>bytes</i></span></td></tr>
+<tr><td valign=top>size_t <i>column</i></td>
+<td valign=top>the initial <i>column</i> number</td></tr>
+<tr><td valign=top>size_t <i>line</i></td>
+<td valign=top>the initial <i>line</i> number</td></tr>
+</table><p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.mark"></a>const pure nothrow @safe size_t <a name="mark"></a><span class="ddoc_psymbol">mark</span>();
+</dt>
+<dd><b>Returns:</b><br>a <a name="mark"></a><span class="ddoc_psymbol">mark</span> at the current position that can then be used with slice.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.seek"></a>pure nothrow @safe void <a name="seek"></a><span class="ddoc_psymbol">seek</span>(size_t <i>m</i>);
+</dt>
+<dd>Sets the range to the given position
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>m</i></td>
+<td valign=top>the position to <a name="seek"></a><span class="ddoc_psymbol">seek</span> to</td></tr>
+</table><p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.slice"></a>const pure nothrow @safe const(ubyte)[] <a name="slice"></a><span class="ddoc_psymbol">slice</span>(size_t <i>m</i>);
+</dt>
+<dd>Returs a <a name="slice"></a><span class="ddoc_psymbol">slice</span> of the input byte array betwene the given mark and the
+ current position.
+ Params <i>m</i> = the beginning index of the <a name="slice"></a><span class="ddoc_psymbol">slice</span> to return<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.empty"></a>const pure nothrow @safe bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
+</dt>
+<dd>Implements the range primitive empty.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.front"></a>const pure nothrow @safe ubyte <a name="front"></a><span class="ddoc_psymbol">front</span>();
+</dt>
+<dd>Implements the range primitive front.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.peek"></a>const pure nothrow @safe const(ubyte)[] <a name="peek"></a><span class="ddoc_psymbol">peek</span>(size_t <i>p</i>);
+</dt>
+<dd><b>Returns:</b><br>the current item as well as the items <span class="d_param"><i>p</i></span> items ahead.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.peekAt"></a>const pure nothrow @safe ubyte <a name="peekAt"></a><span class="ddoc_psymbol">peekAt</span>(size_t <i>offset</i>);
+</dt>
+<dd><p></p>
+</dd>
+<dt class="d_decl"><a name=".LexerRange.canPeek"></a>const pure nothrow @safe bool <a name="canPeek"></a><span class="ddoc_psymbol">canPeek</span>(size_t <i>p</i>);
+</dt>
+<dd><b>Returns:</b><br><b>true</b> if it is possible to peek <span class="d_param"><i>p</i></span> bytes ahead.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.popFront"></a>pure nothrow @safe void <a name="popFront"></a><span class="ddoc_psymbol">popFront</span>();
+</dt>
+<dd>Implements the range primitive popFront.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.popFrontN"></a>pure nothrow @safe void <a name="popFrontN"></a><span class="ddoc_psymbol">popFrontN</span>(size_t <i>n</i>);
+</dt>
+<dd>Implements the algorithm popFrontN more efficiently.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.incrementLine"></a>pure nothrow @safe void <a name="incrementLine"></a><span class="ddoc_psymbol">incrementLine</span>();
+</dt>
+<dd>Increments the range's line number and resets the column counter.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.bytes"></a>const(ubyte)[] <a name="bytes"></a><span class="ddoc_psymbol">bytes</span>;
+</dt>
+<dd>The input bytes.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
+</dt>
+<dd>The range's current position.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
+</dt>
+<dd>The current column number.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".LexerRange.line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
+</dt>
+<dd>The current line number.<p></p>
+
+</dd>
+</dl>
+</dd>
+<dt class="d_decl"><a name=".StringCache"></a>struct <a name="StringCache"></a><span class="ddoc_psymbol">StringCache</span>;
+</dt>
+<dd>The string cache implements a map/set for strings. Placing a string in the
+ cache returns an identifier that can be used to instantly access the stored
+ string. It is then possible to simply compare these indexes instead of
+ performing full string comparisons when comparing the string content of
+ dynamic tokens. The string cache also handles its own memory, so that mutable
+ ubyte[] to lexers can still have immutable string fields in their tokens.
+ Because the string cache also performs de-duplication it is possible to
+ drastically reduce the memory usage of a lexer.<p></p>
+
+<dl><dt class="d_decl"><a name=".StringCache.this"></a> this(size_t <i>bucketCount</i>);
+</dt>
+<dd><b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>bucketCount</i></td>
+<td valign=top>the initial number of buckets.</td></tr>
+</table><p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>);
+</dt>
+<dd>Equivalent to calling cache() and get().
+<pre class="d_code">StringCache cache;
+<span class="d_keyword">ubyte</span>[] str = ['a', 'b', 'c'];
+string s = cache.get(cache.cache(str));
+<span class="d_keyword">assert</span>(s == <span class="d_string">"abc"</span>);
+</pre>
+<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>, uint <i>hash</i>);
+</dt>
+<dd>Equivalent to calling cache() and get().<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>);
+</dt>
+<dd>Caches a string.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
+<td valign=top>the string to <a name="cache"></a><span class="ddoc_psymbol">cache</span></td></tr>
+</table><p></p>
+<b>Returns:</b><br>A key that can be used to retrieve the cached string
+<p></p>
+<b>Examples:</b><br><pre class="d_code">StringCache <span class="d_psymbol">cache</span>;
+<span class="d_keyword">ubyte</span>[] <span class="d_param">bytes</span> = ['a', 'b', 'c'];
+size_t first = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
+size_t second = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
+<span class="d_keyword">assert</span> (first == second);
+</pre>
+<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>, uint <i>hash</i>);
+</dt>
+<dd>Caches a string as above, but uses the given has code instead of
+ calculating one itself. Use this alongside hashStep() can reduce the
+ amount of work necessary when lexing dynamic tokens.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.get"></a>const pure nothrow @safe string <a name="get"></a><span class="ddoc_psymbol">get</span>(size_t <i>index</i>);
+</dt>
+<dd>Gets a cached string based on its key.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>index</i></td>
+<td valign=top>the key</td></tr>
+</table><p></p>
+<b>Returns:</b><br>the cached string<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.hashStep"></a>static pure nothrow @safe uint <a name="hashStep"></a><span class="ddoc_psymbol">hashStep</span>(ubyte <i>b</i>, uint <i>h</i>);
+</dt>
+<dd>Incremental hashing.
+<p></p>
+<b>Parameters:</b><table class=parms><tr><td valign=top>ubyte <i>b</i></td>
+<td valign=top>the byte to add to the hash</td></tr>
+<tr><td valign=top>uint <i>h</i></td>
+<td valign=top>the hash that has been calculated so far</td></tr>
+</table><p></p>
+<b>Returns:</b><br>the new hash code for the string.<p></p>
+
+</dd>
+<dt class="d_decl"><a name=".StringCache.defaultBucketCount"></a>static int <a name="defaultBucketCount"></a><span class="ddoc_psymbol">defaultBucketCount</span>;
+</dt>
+<dd>The default bucket count for the string cache.<p></p>
+
+</dd>
+</dl>
+</dd>
+</dl>
+
+<table width=100%><tr><td><hr align="left" size="8" width="100%" color="maroon" /></td><td width=5%><a href=#top>[top]</a></td></tr></table>
--- a/stdx/lexer.o
+++ b/stdx/lexer.o