diff --git a/analysis/del.d b/analysis/del.d
new file mode 100644
index 0000000..8cd3646
--- /dev/null
+++ b/analysis/del.d
@@ -0,0 +1,29 @@
+// Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.del;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+/**
+ * Checks for use of the deprecated "delete" keyword
+ */
+class DeleteCheck : BaseAnalyzer
+{
+ alias visit = BaseAnalyzer.visit;
+
+ this(string fileName)
+ {
+ super(fileName);
+ }
+
+ override void visit(DeleteExpression d)
+ {
+ addErrorMessage(d.line, d.column, "Avoid using the deprecated delete keyword");
+ d.accept(this);
+ }
+}
diff --git a/analysis/fish.d b/analysis/fish.d
new file mode 100644
index 0000000..e1790d2
--- /dev/null
+++ b/analysis/fish.d
@@ -0,0 +1,38 @@
+// Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.fish;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+/**
+ * Checks for use of the deprecated floating point comparison operators.
+ */
+class FloatOperatorCheck : BaseAnalyzer
+{
+ alias visit = BaseAnalyzer.visit;
+
+ this(string fileName)
+ {
+ super(fileName);
+ }
+
+ override void visit(RelExpression r)
+ {
+ if (r.operator == tok!"<>"
+ || r.operator == tok!"!<>"
+ || r.operator == tok!"!>"
+ || r.operator == tok!"!<"
+ || r.operator == tok!"!<>="
+ || r.operator == tok!"!>="
+ || r.operator == tok!"!<=")
+ {
+ addErrorMessage(r.line, r.column, "Avoid using the deprecated floating-point operators");
+ }
+ r.accept(this);
+ }
+}
diff --git a/analysis/run.d b/analysis/run.d
index 19fa961..d55a7d1 100644
--- a/analysis/run.d
+++ b/analysis/run.d
@@ -15,6 +15,8 @@ import analysis.base;
import analysis.style;
import analysis.enumarrayliteral;
import analysis.pokemon;
+import analysis.del;
+import analysis.fish;
void messageFunction(string fileName, size_t line, size_t column, string message,
bool isError)
@@ -63,8 +65,14 @@ void analyze(File output, string[] fileNames, bool staticAnalyze = true)
auto pokemon = new PokemonExceptionCheck(fileName);
pokemon.visit(m);
+ auto del = new DeleteCheck(fileName);
+ del.visit(m);
+
+ auto fish = new FloatOperatorCheck(fileName);
+ fish.visit(m);
+
foreach (message; sort(chain(enums.messages, style.messages,
- pokemon.messages).array))
+ pokemon.messages, del.messages, fish.messages).array))
{
writeln(message);
}
diff --git a/main.d b/main.d
index 4427959..3b3103a 100644
--- a/main.d
+++ b/main.d
@@ -109,10 +109,11 @@ int main(string[] args)
}
else if (tokenDump)
{
+ writeln("text blank\tindex\tline\tcolumn\tcomment");
foreach (token; tokens)
{
- writeln("«", token.text is null ? str(token.type) : token.text,
- "» ", token.text !is null, " ", token.index, " ", token.line, " ", token.column, " ",
+ writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
+ token.text !is null, token.index, token.line, token.column,
token.comment);
}
return 0;
diff --git a/stdx/d/ast.d b/stdx/d/ast.d
index dc21ae3..7dd5e26 100644
--- a/stdx/d/ast.d
+++ b/stdx/d/ast.d
@@ -31,6 +31,58 @@ import std.string;
abstract class ASTVisitor
{
public:
+
+ void visit(ExpressionNode n)
+ {
+ if (cast(AddExpression) n) visit(cast(AddExpression) n);
+ else if (cast(AndAndExpression) n) visit(cast(AndAndExpression) n);
+ else if (cast(AndExpression) n) visit(cast(AndExpression) n);
+ else if (cast(AsmAddExp) n) visit(cast(AsmAddExp) n);
+ else if (cast(AsmAndExp) n) visit(cast(AsmAndExp) n);
+ else if (cast(AsmEqualExp) n) visit(cast(AsmEqualExp) n);
+ else if (cast(AsmLogAndExp) n) visit(cast(AsmLogAndExp) n);
+ else if (cast(AsmLogOrExp) n) visit(cast(AsmLogOrExp) n);
+ else if (cast(AsmMulExp) n) visit(cast(AsmMulExp) n);
+ else if (cast(AsmOrExp) n) visit(cast(AsmOrExp) n);
+ else if (cast(AsmRelExp) n) visit(cast(AsmRelExp) n);
+ else if (cast(AsmShiftExp) n) visit(cast(AsmShiftExp) n);
+ else if (cast(AssertExpression) n) visit(cast(AssertExpression) n);
+ else if (cast(AssignExpression) n) visit(cast(AssignExpression) n);
+ else if (cast(CmpExpression) n) visit(cast(CmpExpression) n);
+ else if (cast(DeleteExpression) n) visit(cast(DeleteExpression) n);
+ else if (cast(EqualExpression) n) visit(cast(EqualExpression) n);
+ else if (cast(Expression) n) visit(cast(Expression) n);
+ else if (cast(FunctionCallExpression) n) visit(cast(FunctionCallExpression) n);
+ else if (cast(FunctionLiteralExpression) n) visit(cast(FunctionLiteralExpression) n);
+ else if (cast(IdentityExpression) n) visit(cast(IdentityExpression) n);
+ else if (cast(ImportExpression) n) visit(cast(ImportExpression) n);
+ else if (cast(IndexExpression) n) visit(cast(IndexExpression) n);
+ else if (cast(InExpression) n) visit(cast(InExpression) n);
+ else if (cast(IsExpression) n) visit(cast(IsExpression) n);
+ else if (cast(LambdaExpression) n) visit(cast(LambdaExpression) n);
+ else if (cast(MixinExpression) n) visit(cast(MixinExpression) n);
+ else if (cast(MulExpression) n) visit(cast(MulExpression) n);
+ else if (cast(NewAnonClassExpression) n) visit(cast(NewAnonClassExpression) n);
+ else if (cast(NewExpression) n) visit(cast(NewExpression) n);
+ else if (cast(OrExpression) n) visit(cast(OrExpression) n);
+ else if (cast(OrOrExpression) n) visit(cast(OrOrExpression) n);
+ else if (cast(PostIncDecExpression) n) visit(cast(PostIncDecExpression) n);
+ else if (cast(PowExpression) n) visit(cast(PowExpression) n);
+ else if (cast(PragmaExpression) n) visit(cast(PragmaExpression) n);
+ else if (cast(PreIncDecExpression) n) visit(cast(PreIncDecExpression) n);
+ else if (cast(PrimaryExpression) n) visit(cast(PrimaryExpression) n);
+ else if (cast(RelExpression) n) visit(cast(RelExpression) n);
+ else if (cast(ShiftExpression) n) visit(cast(ShiftExpression) n);
+ else if (cast(SliceExpression) n) visit(cast(SliceExpression) n);
+ else if (cast(TemplateMixinExpression) n) visit(cast(TemplateMixinExpression) n);
+ else if (cast(TernaryExpression) n) visit(cast(TernaryExpression) n);
+ else if (cast(TraitsExpression) n) visit(cast(TraitsExpression) n);
+ else if (cast(TypeidExpression) n) visit(cast(TypeidExpression) n);
+ else if (cast(TypeofExpression) n) visit(cast(TypeofExpression) n);
+ else if (cast(UnaryExpression) n) visit(cast(UnaryExpression) n);
+ else if (cast(XorExpression) n) visit(cast(XorExpression) n);
+ }
+
/** */ void visit(AddExpression addExpression) { addExpression.accept(this); }
/** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); }
/** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); }
@@ -104,7 +156,6 @@ public:
/** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); }
/** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); }
/** */ void visit(Expression expression) { expression.accept(this); }
- /** */ void visit(ExpressionNode expressionNode) { expressionNode.accept(this); }
/** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); }
/** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); }
/** */ void visit(Finally finally_) { finally_.accept(this); }
@@ -234,10 +285,11 @@ public:
interface ASTNode
{
+public:
/** */ void accept(ASTVisitor visitor);
}
-immutable string DEFAULT_ACCEPT = q{void accept(ASTVisitor visitor) {}};
+immutable string DEFAULT_ACCEPT = q{override void accept(ASTVisitor visitor) {}};
template visitIfNotNull(fields ...)
{
@@ -259,19 +311,28 @@ template visitIfNotNull(fields ...)
}
}
-abstract class ExpressionNode : ASTNode {}
+abstract class ExpressionNode : ASTNode
+{
+public:
+ override void accept(ASTVisitor visitor)
+ {
+ assert (false);
+ }
+}
mixin template BinaryExpressionBody()
{
ExpressionNode left;
ExpressionNode right;
+ size_t line;
+ size_t column;
}
///
class AddExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -283,7 +344,7 @@ public:
class AliasDeclaration : ASTNode
{
public:
- void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(type, name, initializers));
}
@@ -332,7 +393,7 @@ public:
class AndAndExpression : ExpressionNode
{
public:
- void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -343,7 +404,7 @@ public:
class AndExpression : ExpressionNode
{
public:
- void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -566,7 +627,7 @@ public:
class AssertExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(assertion, message));
}
@@ -578,7 +639,7 @@ public:
class AssignExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(ternaryExpression, assignExpression));
}
@@ -816,7 +877,7 @@ public:
class CmpExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(shiftExpression, equalExpression,
identityExpression, relExpression, inExpression));
@@ -1031,11 +1092,13 @@ public:
class DeleteExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression));
}
/** */ UnaryExpression unaryExpression;
+ /** */ size_t line;
+ /** */ size_t column;
}
///
@@ -1151,7 +1214,7 @@ public:
class EqualExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1163,7 +1226,7 @@ public:
class Expression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(items));
}
@@ -1293,7 +1356,7 @@ public:
class FunctionCallExpression : ExpressionNode
{
public:
- void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments));
}
@@ -1306,7 +1369,7 @@ public:
class FunctionCallStatement : ASTNode
{
public:
- void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(functionCallExpression));
}
@@ -1338,7 +1401,7 @@ public:
class FunctionLiteralExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(type, parameters, functionAttributes,
functionBody));
@@ -1413,7 +1476,7 @@ public:
class IdentityExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1478,7 +1541,7 @@ public:
class ImportExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(assignExpression));
}
@@ -1489,7 +1552,7 @@ public:
class IndexExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression, argumentList));
}
@@ -1501,7 +1564,7 @@ public:
class InExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1575,7 +1638,7 @@ public:
class IsExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(type, identifier, typeSpecialization,
templateParameterList));
@@ -1626,7 +1689,7 @@ public:
class LambdaExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(identifier, parameters, functionAttributes,
assignExpression));
@@ -1689,7 +1752,7 @@ public:
class MixinExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(assignExpression));
}
@@ -1748,7 +1811,7 @@ public:
class MulExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1760,7 +1823,7 @@ public:
class NewAnonClassExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(allocatorArguments, constructorArguments,
baseClassList, structBody));
@@ -1775,7 +1838,7 @@ public:
class NewExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(newAnonClassExpression, type, arguments,
assignExpression));
@@ -1863,7 +1926,7 @@ public:
class OrExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1874,7 +1937,7 @@ public:
class OrOrExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1937,7 +2000,7 @@ public:
class PostIncDecExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression));
}
@@ -1949,7 +2012,7 @@ public:
class PowExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -1971,7 +2034,7 @@ public:
class PragmaExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(identifier, argumentList));
}
@@ -1983,7 +2046,7 @@ public:
class PreIncDecExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression));
}
@@ -1995,7 +2058,7 @@ public:
class PrimaryExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(basicType, primary, typeofExpression,
typeidExpression, arrayLiteral, assocArrayLiteral, expression,
@@ -2035,7 +2098,7 @@ public:
class RelExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -2096,7 +2159,7 @@ public:
class ShiftExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
@@ -2120,7 +2183,7 @@ public:
class SliceExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(unaryExpression, lower, upper));
}
@@ -2409,7 +2472,7 @@ public:
class TemplateMixinExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName));
}
@@ -2534,7 +2597,7 @@ public:
class TernaryExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression));
}
@@ -2558,7 +2621,7 @@ public:
class TraitsExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(identifier, templateArgumentList));
}
@@ -2647,7 +2710,7 @@ public:
class TypeidExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(type, expression));
}
@@ -2659,7 +2722,7 @@ public:
class TypeofExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(expression, return_));
}
@@ -2671,7 +2734,7 @@ public:
class UnaryExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
// TODO prefix, postfix, unary
mixin (visitIfNotNull!(primaryExpression, newExpression,
@@ -2803,7 +2866,7 @@ public:
class XorExpression : ExpressionNode
{
public:
- /+override+/ void accept(ASTVisitor visitor)
+ override void accept(ASTVisitor visitor)
{
mixin (visitIfNotNull!(left, right));
}
diff --git a/stdx/d/parser.d b/stdx/d/parser.d
index 0c6aea1..8164ef3 100644
--- a/stdx/d/parser.d
+++ b/stdx/d/parser.d
@@ -1874,6 +1874,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
{
mixin(traceEnterAndExit!(__FUNCTION__));
auto node = new DeleteExpression;
+ node.line = current.line;
+ node.column = current.column;
if (expect(tok!"delete") is null) return null;
node.unaryExpression = parseUnaryExpression();
return node;
@@ -3990,6 +3992,7 @@ q{(int a, ...)
*/
PragmaDeclaration parsePragmaDeclaration()
{
+ mixin (traceEnterAndExit!(__FUNCTION__));
auto node = new PragmaDeclaration;
node.pragmaExpression = parsePragmaExpression();
expect(tok!";");
@@ -4005,6 +4008,7 @@ q{(int a, ...)
*/
PragmaExpression parsePragmaExpression()
{
+ mixin (traceEnterAndExit!(__FUNCTION__));
auto node = new PragmaExpression;
expect(tok!"pragma");
expect(tok!"(");
@@ -4264,8 +4268,9 @@ q{(int a, ...)
* | $(LITERAL '!<=')
* ;)
*/
- ExpressionNode parseRelExpression(ExpressionNode shift = null)
+ ExpressionNode parseRelExpression(ExpressionNode shift)
{
+ mixin (traceEnterAndExit!(__FUNCTION__));
return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression,
tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>",
tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<",
@@ -6238,7 +6243,11 @@ protected:
{
auto n = new ExpressionType;
static if (__traits(hasMember, ExpressionType, "operator"))
+ {
+ n.line = current.line;
+ n.column = current.column;
n.operator = advance().type;
+ }
else
advance();
n.left = node;
diff --git a/stdx/lexer.d b/stdx/lexer.d
index 850253e..c013d2f 100644
--- a/stdx/lexer.d
+++ b/stdx/lexer.d
@@ -1,8 +1,99 @@
// Written in the D programming language
/**
+ * $(H2 Summary)
* This module contains a range-based _lexer generator.
*
+ * $(H2 Overview)
+ * The _lexer generator consists of a template mixin, $(LREF Lexer), along with
+ * several helper templates for generating such things as token identifiers.
+ *
+ * To write a _lexer using this API:
+ * $(OL
+ * $(LI Create the string array costants for your language.
+ * $(UL
+ * $(LI $(LINK2 #.StringConstants, String Constants))
+ * ))
+ * $(LI Create aliases for the various token and token identifier types
+ * specific to your language.
+ * $(UL
+ * $(LI $(LREF TokenIdType))
+ * $(LI $(LREF tokenStringRepresentation))
+ * $(LI $(LREF TokenStructure))
+ * $(LI $(LREF TokenId))
+ * ))
+ * $(LI Create a struct that mixes in the Lexer template mixin and
+ * implements the necessary functions.
+ * $(UL
+ * $(LI $(LREF Lexer))
+ * ))
+ * )
+ * Examples:
+ * $(UL
+ * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
+ * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
+ * )
+ * $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
+ * $(DL
+ * $(DT $(B staticTokens))
+ * $(DD A listing of the tokens whose exact value never changes and which cannot
+ * possibly be a token handled by the default token lexing function. The
+ * most common example of this kind of token is an operator such as
+ * $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
+ * $(DT $(B dynamicTokens))
+ * $(DD A listing of tokens whose value is variable, such as whitespace,
+ * identifiers, number literals, and string literals.)
+ * $(DT $(B possibleDefaultTokens))
+ * $(DD A listing of tokens that could posibly be one of the tokens handled by
+ * the default token handling function. An common example of this is
+ * a keyword such as $(D_STRING "for"), which looks like the beginning of
+ * the identifier $(D_STRING "fortunate"). isSeparating is called to
+ * determine if the character after the $(D_STRING 'r') separates the
+ * identifier, indicating that the token is $(D_STRING "for"), or if lexing
+ * should be turned over to the defaultTokenFunction.)
+ * $(DT $(B tokenHandlers))
+ * $(DD A mapping of prefixes to custom token handling function names. The
+ * generated _lexer will search for the even-index elements of this array,
+ * and then call the function whose name is the element immedately after the
+ * even-indexed element. This is used for lexing complex tokens whose prefix
+ * is fixed.)
+ * )
+ *
+ * Here are some example constants for a simple calculator _lexer:
+ * ---
+ * // There are a near infinite number of valid number literals, so numbers are
+ * // dynamic tokens.
+ * enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
+ *
+ * // The operators are always the same, and cannot start a numberLiteral, so
+ * // they are staticTokens
+ * enum string[] staticTokens = ["-", "+", "*", "/"];
+ *
+ * // In this simple example there are no keywords or other tokens that could
+ * // look like dynamic tokens, so this is blank.
+ * enum string[] possibleDefaultTokens = [];
+ *
+ * // If any whitespace character or digit is encountered, pass lexing over to
+ * // our custom handler functions. These will be demonstrated in an example
+ * // later on.
+ * enum string[] tokenHandlers = [
+ * "0", "lexNumber",
+ * "1", "lexNumber",
+ * "2", "lexNumber",
+ * "3", "lexNumber",
+ * "4", "lexNumber",
+ * "5", "lexNumber",
+ * "6", "lexNumber",
+ * "7", "lexNumber",
+ * "8", "lexNumber",
+ * "9", "lexNumber",
+ * " ", "lexWhitespace",
+ * "\n", "lexWhitespace",
+ * "\t", "lexWhitespace",
+ * "\r", "lexWhitespace"
+ * ];
+ * ---
+ *
* Copyright: Brian Schott 2013
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
* Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
@@ -16,7 +107,12 @@ module stdx.lexer;
* unsigned integral type that is able to hold the value
* staticTokens.length + dynamicTokens.length. For example if there are 20
* static tokens, 30 dynamic tokens, and 10 possible default tokens, this
- * template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max.
+ * template will alias itself to ubyte, as 20 + 30 + 10 < $(D_KEYWORD ubyte).max.
+ * Examples:
+ * ---
+ * // In our calculator example this means that IdType is an alias for ubyte.
+ * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
+ * ---
*/
template TokenIdType(alias staticTokens, alias dynamicTokens,
alias possibleDefaultTokens)
@@ -32,7 +128,15 @@ template TokenIdType(alias staticTokens, alias dynamicTokens,
}
/**
- * Looks up the string representation of the given token type.
+ * Looks up the string representation of the given token type. This is the
+ * opposite of the function of the TokenId template.
+ * Params: type = the token type identifier
+ * Examples:
+ * ---
+ * alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
+ * assert (str(tok!"*") == "*");
+ * ---
+ * See_also: $(LREF TokenId)
*/
string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property
{
@@ -57,18 +161,18 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
* valid token type identifier)
* )
* In all cases this template will alias itself to a constant of type IdType.
+ * This template will fail at compile time if $(D_PARAM symbol) is not one of
+ * the staticTokens, dynamicTokens, or possibleDefaultTokens.
* Examples:
* ---
- * enum string[] staticTokens = ["+", "-", "*", "/"];
- * enum string[] dynamicTokens = ["number"];
- * enum string[] possibleDefaultTokens = [];
- * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
* template tok(string symbol)
* {
* alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
* possibleDefaultTokens, symbol);
* }
+ * // num and plus are of type ubyte.
* IdType plus = tok!"+";
+ * IdType num = tok!"numberLiteral";
* ---
*/
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
@@ -118,35 +222,49 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens,
/**
* The token that is returned by the lexer.
* Params:
- * IDType = The D type of the "type" token type field.
+ * IdType = The D type of the "type" token type field.
* extraFields = A string containing D code for any extra fields that should
* be included in the token structure body. This string is passed
* directly to a mixin statement.
+ * Examples:
+ * ---
+ * // No extra struct fields are desired in this example, so leave it blank.
+ * alias Token = TokenStructure!(IdType, "");
+ * Token minusToken = Token(tok!"-");
+ * ---
*/
-struct TokenStructure(IDType, string extraFields = "")
+struct TokenStructure(IdType, string extraFields = "")
{
public:
/**
* == overload for the the token type.
*/
- bool opEquals(IDType type) const pure nothrow @safe
+ bool opEquals(IdType type) const pure nothrow @safe
{
return this.type == type;
}
/**
- *
+ * Constructs a token from a token type.
+ * Params: type = the token type
*/
- this(IDType type)
+ this(IdType type)
{
this.type = type;
}
/**
- *
+ * Constructs a token.
+ * Params:
+ * type = the token type
+ * text = the text of the token, which may be null
+ * line = the line number at which this token occurs
+ * column = the column nmuber at which this token occurs
+ * index = the byte offset from the beginning of the input at which this
+ * token occurs
*/
- this(IDType type, string text, size_t line, size_t column, size_t index)
+ this(IdType type, string text, size_t line, size_t column, size_t index)
{
this.text = text;
this.line = line;
@@ -156,39 +274,105 @@ public:
}
/**
- *
+ * The _text of the token.
*/
string text;
/**
- *
+ * The line number at which this token occurs.
*/
size_t line;
/**
- *
+ * The Column nmuber at which this token occurs.
*/
size_t column;
/**
- *
+ * The byte offset from the beginning of the input at which this token
+ * occurs.
*/
size_t index;
/**
- *
+ * The token type.
*/
- IDType type;
+ IdType type;
mixin (extraFields);
}
+/**
+ * The implementation of the _lexer is contained within this mixin template.
+ * To use it, this template should be mixed in to a struct that represents the
+ * _lexer for your language. This struct should implement the following methods:
+ * $(UL
+ * $(LI popFront, which should call this mixin's _popFront() and
+ * additionally perform any token filtering or shuffling you deem
+ * necessary. For example, you can implement popFront to skip comment or
+ * tokens.)
+ * $(LI A function that serves as the default token lexing function. For
+ * most languages this will be the identifier lexing function.)
+ * $(LI A function that is able to determine if an identifier/keyword has
+ * come to an end. This function must retorn $(D_KEYWORD bool) and take
+ * a single $(D_KEYWORD size_t) argument representing the number of
+ * bytes to skip over before looking for a separating character.)
+ * $(LI Any functions referred to in the tokenHandlers template paramater.
+ * These functions must be marked $(D_KEYWORD pure nothrow), take no
+ * arguments, and return a token)
+ * $(LI A constructor that initializes the range field as well as calls
+ * popFront() exactly once (to initialize the _front field).)
+ * )
+ * Examples:
+ * ---
+ * struct CalculatorLexer
+ * {
+ * mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
+ * staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
+ *
+ * this (ubyte[] bytes)
+ * {
+ * this.range = LexerRange(bytes);
+ * popFront();
+ * }
+ *
+ * void popFront() pure
+ * {
+ * _popFront();
+ * }
+ *
+ * Token lexNumber() pure nothrow @safe
+ * {
+ * ...
+ * }
+ *
+ * Token lexWhitespace() pure nothrow @safe
+ * {
+ * ...
+ * }
+ *
+ * Token defaultTokenFunction() pure nothrow @safe
+ * {
+ * // There is no default token in the example calculator language, so
+ * // this is always an error.
+ * range.popFront();
+ * return Token(tok!"");
+ * }
+ *
+ * bool isSeparating(size_t offset) pure nothrow @safe
+ * {
+ * // For this example language, always return true.
+ * return true;
+ * }
+ * }
+ * ---
+ */
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
- alias pseudoTokenHandlers, alias possibleDefaultTokens)
+ alias tokenHandlers, alias possibleDefaultTokens)
{
- static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must"
+ static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must"
~ " have a corresponding handler function name.");
static string generateMask(const ubyte[] arr)
@@ -214,7 +398,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
import std.string;
import std.range;
- string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2));
+ string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2));
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
string code;
for (size_t i = 0; i < allTokens.length; i++)
@@ -240,7 +424,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
if (pseudoTokens.countUntil(tokens[0]) >= 0)
{
return " return "
- ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
+ ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
~ "();\n";
}
else if (staticTokens.countUntil(tokens[0]) >= 0)
@@ -251,7 +435,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
{
return " return "
- ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
+ ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
~ "();\n";
}
}
@@ -271,14 +455,14 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
if (token.length <= 8)
{
code ~= " return "
- ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
+ ~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
~ "();\n";
}
else
{
code ~= " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
code ~= " return "
- ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
+ ~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
~ "();\n";
}
}
@@ -325,16 +509,23 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
return code;
}
+ /**
+ * Implements the range primitive front().
+ */
ref const(Token) front() pure nothrow const @property
{
return _front;
}
+
void _popFront() pure
{
_front = advance();
}
+ /**
+ * Implements the range primitive empty().
+ */
bool empty() pure const nothrow @property
{
return _front.type == tok!"\0";
@@ -359,9 +550,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
return retVal;
}
- /**
- * This only exists because the real array() can't be called at compile-time
- */
+ // This only exists because the real array() can't be called at compile-time
static string[] stupidToArray(R)(R range)
{
string[] retVal;
@@ -397,13 +586,30 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
}
}
+ /**
+ * The lexer input.
+ */
LexerRange range;
+
+ /**
+ * The token that is currently at the front of the range.
+ */
Token _front;
}
+/**
+ * Range structure that wraps the _lexer's input.
+ */
struct LexerRange
{
+ /**
+ * Params:
+ * bytes = the _lexer input
+ * index = the initial offset from the beginning of $(D_PARAM bytes)
+ * column = the initial column number
+ * line = the initial line number
+ */
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
{
this.bytes = bytes;
@@ -412,31 +618,52 @@ struct LexerRange
this.line = line;
}
+ /**
+ * Returns: a mark at the current position that can then be used with slice.
+ */
size_t mark() const nothrow pure @safe
{
return index;
}
+ /**
+ * Sets the range to the given position
+ * Params: m = the position to seek to
+ */
void seek(size_t m) nothrow pure @safe
{
index = m;
}
+ /**
+ * Returs a slice of the input byte array betwene the given mark and the
+ * current position.
+ * Params m = the beginning index of the slice to return
+ */
const(ubyte)[] slice(size_t m) const nothrow pure @safe
{
return bytes[m .. index];
}
+ /**
+ * Implements the range primitive _empty.
+ */
bool empty() const nothrow pure @safe
{
return index >= bytes.length;
}
+ /**
+ * Implements the range primitive _front.
+ */
ubyte front() const nothrow pure @safe
{
return bytes[index];
}
+ /**
+ * Returns: the current item as well as the items $(D_PARAM p) items ahead.
+ */
const(ubyte)[] peek(size_t p) const nothrow pure @safe
{
return index + p + 1 > bytes.length
@@ -444,48 +671,79 @@ struct LexerRange
: bytes[index .. index + p + 1];
}
+ /**
+ *
+ */
ubyte peekAt(size_t offset) const nothrow pure @safe
{
return bytes[index + offset];
}
+ /**
+ * Returns: true if it is possible to peek $(D_PARAM p) bytes ahead.
+ */
bool canPeek(size_t p) const nothrow pure @safe
{
return index + p < bytes.length;
}
+ /**
+ * Implements the range primitive _popFront.
+ */
void popFront() pure nothrow @safe
{
index++;
column++;
}
+ /**
+ * Implements the algorithm _popFrontN more efficiently.
+ */
void popFrontN(size_t n) pure nothrow @safe
{
index += n;
+ column += n;
}
+ /**
+ * Increments the range's line number and resets the column counter.
+ */
void incrementLine() pure nothrow @safe
{
column = 1;
line++;
}
+ /**
+ * The input _bytes.
+ */
const(ubyte)[] bytes;
+
+ /**
+ * The range's current position.
+ */
size_t index;
+
+ /**
+ * The current _column number.
+ */
size_t column;
+
+ /**
+ * The current _line number.
+ */
size_t line;
}
/**
- * The string cache should be used within lexer implementations for several
- * reasons:
- * $(UL
- * $(LI Reducing memory consumption.)
- * $(LI Increasing performance in token comparisons)
- * $(LI Correctly creating immutable token text if the lexing source is not
- * immutable)
- * )
+ * The string cache implements a map/set for strings. Placing a string in the
+ * cache returns an identifier that can be used to instantly access the stored
+ * string. It is then possible to simply compare these indexes instead of
+ * performing full string comparisons when comparing the string content of
+ * dynamic tokens. The string cache also handles its own memory, so that mutable
+ * ubyte[] to lexers can still have immutable string fields in their tokens.
+ * Because the string cache also performs de-duplication it is possible to
+ * drastically reduce the memory usage of a lexer.
*/
struct StringCache
{
@@ -493,7 +751,10 @@ public:
@disable this();
- this(size_t bucketCount = defaultBucketCount)
+ /**
+ * Params: bucketCount = the initial number of buckets.
+ */
+ this(size_t bucketCount)
{
buckets = new Item*[bucketCount];
}
@@ -512,6 +773,9 @@ public:
return get(cache(bytes));
}
+ /**
+ * Equivalent to calling cache() and get().
+ */
string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
{
return get(cache(bytes, hash));
@@ -536,6 +800,11 @@ public:
return cache(bytes, hash);
}
+ /**
+ * Caches a string as above, but uses the given has code instead of
+ * calculating one itself. Use this alongside hashStep() can reduce the
+ * amount of work necessary when lexing dynamic tokens.
+ */
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
in
{
@@ -583,11 +852,21 @@ public:
writeln("rehashes: ", rehashCount);
}
+ /**
+ * Incremental hashing.
+ * Params:
+ * b = the byte to add to the hash
+ * h = the hash that has been calculated so far
+ * Returns: the new hash code for the string.
+ */
static uint hashStep(ubyte b, uint h) pure nothrow @safe
{
return (h ^ sbox[b]) * 3;
}
+ /**
+ * The default bucket count for the string cache.
+ */
static enum defaultBucketCount = 2048;
private:
diff --git a/stdx/lexer.html b/stdx/lexer.html
new file mode 100644
index 0000000..8881aa0
--- /dev/null
+++ b/stdx/lexer.html
@@ -0,0 +1,483 @@
+
stdx.lexer
+This module contains a range-based lexer generator.
+
+The lexer generator consists of a template mixin, Lexer, along with several
+ helper templates for generating such things as token identifiers.
+
+
+ To generate a lexer using this API, several constants must be supplied:
+ - staticTokens
+ - A listing of the tokens whose exact value never changes and which cannot
+ possibly be a token handled by the default token lexing function. The
+ most common example of this kind of token is an operator such as "*", or
+ "-" in a programming language.
+ - dynamicTokens
+ - A listing of tokens whose value is variable, such as whitespace,
+ identifiers, number literals, and string literals.
+ - possibleDefaultTokens
+ - A listing of tokens that could posibly be one of the tokens handled by
+ the default token handling function. An common example of this is
+ a keyword such as "for", which looks like the beginning of
+ the identifier "fortunate". isSeparating is called to
+ determine if the character after the 'r' separates the
+ identifier, indicating that the token is "for", or if lexing
+ should be turned over to the defaultTokenFunction.
+ - tokenHandlers
+ - A mapping of prefixes to custom token handling function names. The
+ generated lexer will search for the even-index elements of this array,
+ and then call the function whose name is the element immedately after the
+ even-indexed element. This is used for lexing complex tokens whose prefix
+ is fixed.
+
+
+
+ Here are some example constants for a simple calculator lexer:
+enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
+
+enum string[] staticTokens = ["-", "+", "*", "/"];
+
+enum string[] possibleDefaultTokens = [];
+
+enum string[] tokenHandlers = [
+ "0", "lexNumber",
+ "1", "lexNumber",
+ "2", "lexNumber",
+ "3", "lexNumber",
+ "4", "lexNumber",
+ "5", "lexNumber",
+ "6", "lexNumber",
+ "7", "lexNumber",
+ "8", "lexNumber",
+ "9", "lexNumber",
+ " ", "lexWhitespace",
+ "\n", "lexWhitespace",
+ "\t", "lexWhitespace",
+ "\r", "lexWhitespace"
+];
+
+
+
+Examples:
- A lexer for D is available here.
+ - A lexer for Lua is available here.
+
+
+License:
License 1.0
+
+Authors:
Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
+
+Source:
+std/lexer.d
+
+- template TokenIdType(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)
+- Template for determining the type used for a token type. Selects the smallest
+ unsigned integral type that is able to hold the value
+ staticTokens.length + dynamicTokens.length. For example if there are 20
+ static tokens, 30 dynamic tokens, and 10 possible default tokens, this
+ template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max.
+
+Examples:
alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
+
+
+
+
+- @property string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type);
+
+- Looks up the string representation of the given token type. This is the
+ opposite of the function of the TokenId template.
+
+Parameters:
IdType type |
+the token type identifier |
+
+Examples:
alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
+assert (str(tok!"*") == "*");
+
+
+See Also:
TokenId
+
+
+- template TokenId(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)
+- Generates the token type identifier for the given symbol. There are two
+ special cases:
+
- If symbol is "", then the token identifier will be 0
+ - If symbol is "\0", then the token identifier will be the maximum
+ valid token type identifier
+
+ In all cases this template will alias itself to a constant of type IdType.
+ This template will fail at compile time if symbol is not one of
+ the staticTokens, dynamicTokens, or possibleDefaultTokens.
+
+Examples:
template tok(string symbol)
+{
+ alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
+ possibleDefaultTokens, symbol);
+}
+IdType plus = tok!"+";
+IdType num = tok!"numberLiteral";
+
+
+
+
+- struct TokenStructure(IdType, string extraFields = "");
+
+- The token that is returned by the lexer.
+
+Parameters:
IdType |
+The D type of the "type" token type field. |
+extraFields |
+A string containing D code for any extra fields that should
+ be included in the token structure body. This string is passed
+ directly to a mixin statement. |
+
+Examples:
alias Token = TokenStructure!(IdType, "");
+Token minusToken = Token(tok!"-");
+
+
+
+- const pure nothrow @safe bool opEquals(IdType type);
+
+- == overload for the the token type.
+
+
+- this(IdType type);
+
+- Constructs a token from a token type.
+
+Parameters:
IdType type |
+the token type |
+
+
+
+- this(IdType type, string text, size_t line, size_t column, size_t index);
+
+- Constructs a token.
+
+Parameters:
IdType type |
+the token type |
+string text |
+the text of the token, which may be null |
+size_t line |
+the line number at which this token occurs |
+size_t column |
+the column nmuber at which this token occurs |
+size_t index |
+the byte offset from the beginning of the input at which this
+ token occurs |
+
+
+
+- string text;
+
+- The text of the token.
+
+
+- size_t line;
+
+- The line number at which this token occurs.
+
+
+- size_t column;
+
+- The Column nmuber at which this token occurs.
+
+
+- size_t index;
+
+- The byte offset from the beginning of the input at which this token
+ occurs.
+
+
+- IdType type;
+
+- The token type.
+
+
+
+
+- template Lexer(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)
+- The implementation of the lexer is contained within this mixin template.
+ To use it, this template should be mixed in to a struct that represents the
+ lexer for your language. This struct should implement the following methods:
+
- popFront, which should call this mixin's popFront() and
+ additionally perform any token filtering or shuffling you deem
+ necessary. For example, you can implement popFront to skip comment or
+ tokens.
+ - A function that serves as the default token lexing function. For
+ most languages this will be the identifier lexing function.
+ - A function that is able to determine if an identifier/keyword has
+ come to an end. This function must retorn bool and take
+ a single size_t argument representing the number of
+ bytes to skip over before looking for a separating character.
+ - Any functions referred to in the tokenHandlers template paramater.
+ These functions must be marked pure nothrow, take no
+ arguments, and return a token
+ - A constructor that initializes the range field as well as calls
+ popFront() exactly once (to initialize the front field).
+
+
+Examples:
struct CalculatorLexer
+{
+ mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
+ staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
+
+ this (ubyte[] bytes)
+ {
+ this.range = LexerRange(bytes);
+ popFront();
+ }
+
+ void popFront() pure
+ {
+ _popFront();
+ }
+
+ Token lexNumber() pure nothrow @safe
+ {
+ ...
+ }
+
+ Token lexWhitespace() pure nothrow @safe
+ {
+ ...
+ }
+
+ Token defaultTokenFunction() pure nothrow @safe
+ {
+ range.popFront();
+ return Token(tok!"");
+ }
+
+ bool isSeparating(size_t offset) pure nothrow @safe
+ {
+ return true;
+ }
+}
+
+
+
+- const pure nothrow @property const(Token) front();
+
+- Implements the range primitive front().
+
+
+- const pure nothrow @property bool empty();
+
+- Implements the range primitive empty().
+
+
+- LexerRange range;
+
+- The lexer input.
+
+
+- Token _front;
+
+- The token that is currently at the front of the range.
+
+
+
+
+- struct LexerRange;
+
+- Range structure that wraps the lexer's input.
+
+
- pure nothrow @safe this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1);
+
+- Parameters:
const(ubyte)[] bytes |
+the lexer input |
+size_t index |
+the initial offset from the beginning of bytes |
+size_t column |
+the initial column number |
+size_t line |
+the initial line number |
+
+
+
+- const pure nothrow @safe size_t mark();
+
+- Returns:
a mark at the current position that can then be used with slice.
+
+
+- pure nothrow @safe void seek(size_t m);
+
+- Sets the range to the given position
+
+Parameters:
size_t m |
+the position to seek to |
+
+
+
+- const pure nothrow @safe const(ubyte)[] slice(size_t m);
+
+- Returs a slice of the input byte array betwene the given mark and the
+ current position.
+ Params m = the beginning index of the slice to return
+
+
+- const pure nothrow @safe bool empty();
+
+- Implements the range primitive empty.
+
+
+- const pure nothrow @safe ubyte front();
+
+- Implements the range primitive front.
+
+
+- const pure nothrow @safe const(ubyte)[] peek(size_t p);
+
+- Returns:
the current item as well as the items p items ahead.
+
+
+- const pure nothrow @safe ubyte peekAt(size_t offset);
+
+-
+
+- const pure nothrow @safe bool canPeek(size_t p);
+
+- Returns:
true if it is possible to peek p bytes ahead.
+
+
+- pure nothrow @safe void popFront();
+
+- Implements the range primitive popFront.
+
+
+- pure nothrow @safe void popFrontN(size_t n);
+
+- Implements the algorithm popFrontN more efficiently.
+
+
+- pure nothrow @safe void incrementLine();
+
+- Increments the range's line number and resets the column counter.
+
+
+- const(ubyte)[] bytes;
+
+- The input bytes.
+
+
+- size_t index;
+
+- The range's current position.
+
+
+- size_t column;
+
+- The current column number.
+
+
+- size_t line;
+
+- The current line number.
+
+
+
+
+- struct StringCache;
+
+- The string cache implements a map/set for strings. Placing a string in the
+ cache returns an identifier that can be used to instantly access the stored
+ string. It is then possible to simply compare these indexes instead of
+ performing full string comparisons when comparing the string content of
+ dynamic tokens. The string cache also handles its own memory, so that mutable
+ ubyte[] to lexers can still have immutable string fields in their tokens.
+ Because the string cache also performs de-duplication it is possible to
+ drastically reduce the memory usage of a lexer.
+
+
- this(size_t bucketCount);
+
+- Parameters:
size_t bucketCount |
+the initial number of buckets. |
+
+
+
+- pure nothrow @safe string cacheGet(const(ubyte[]) bytes);
+
+- Equivalent to calling cache() and get().
+
StringCache cache;
+ubyte[] str = ['a', 'b', 'c'];
+string s = cache.get(cache.cache(str));
+assert(s == "abc");
+
+
+
+
+- pure nothrow @safe string cacheGet(const(ubyte[]) bytes, uint hash);
+
+- Equivalent to calling cache() and get().
+
+
+- pure nothrow @safe size_t cache(const(ubyte)[] bytes);
+
+- Caches a string.
+
+Parameters:
const(ubyte)[] bytes |
+the string to cache |
+
+Returns:
A key that can be used to retrieve the cached string
+
+Examples:
StringCache cache;
+ubyte[] bytes = ['a', 'b', 'c'];
+size_t first = cache.cache(bytes);
+size_t second = cache.cache(bytes);
+assert (first == second);
+
+
+
+
+- pure nothrow @safe size_t cache(const(ubyte)[] bytes, uint hash);
+
+- Caches a string as above, but uses the given has code instead of
+ calculating one itself. Use this alongside hashStep() can reduce the
+ amount of work necessary when lexing dynamic tokens.
+
+
+- const pure nothrow @safe string get(size_t index);
+
+- Gets a cached string based on its key.
+
+Parameters:
+Returns:
the cached string
+
+
+- static pure nothrow @safe uint hashStep(ubyte b, uint h);
+
+- Incremental hashing.
+
+Parameters:
ubyte b |
+the byte to add to the hash |
+uint h |
+the hash that has been calculated so far |
+
+Returns:
the new hash code for the string.
+
+
+- static int defaultBucketCount;
+
+- The default bucket count for the string cache.
+
+
+
+
+
+
+
diff --git a/stdx/lexer.o b/stdx/lexer.o
new file mode 100644
index 0000000..162acdb
Binary files /dev/null and b/stdx/lexer.o differ