diff --git a/analysis/del.d b/analysis/del.d new file mode 100644 index 0000000..8cd3646 --- /dev/null +++ b/analysis/del.d @@ -0,0 +1,29 @@ +// Copyright Brian Schott (Sir Alaran) 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module analysis.del; + +import stdx.d.ast; +import stdx.d.lexer; +import analysis.base; + +/** + * Checks for use of the deprecated "delete" keyword + */ +class DeleteCheck : BaseAnalyzer +{ + alias visit = BaseAnalyzer.visit; + + this(string fileName) + { + super(fileName); + } + + override void visit(DeleteExpression d) + { + addErrorMessage(d.line, d.column, "Avoid using the deprecated delete keyword"); + d.accept(this); + } +} diff --git a/analysis/fish.d b/analysis/fish.d new file mode 100644 index 0000000..e1790d2 --- /dev/null +++ b/analysis/fish.d @@ -0,0 +1,38 @@ +// Copyright Brian Schott (Sir Alaran) 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module analysis.fish; + +import stdx.d.ast; +import stdx.d.lexer; +import analysis.base; + +/** + * Checks for use of the deprecated floating point comparison operators. + */ +class FloatOperatorCheck : BaseAnalyzer +{ + alias visit = BaseAnalyzer.visit; + + this(string fileName) + { + super(fileName); + } + + override void visit(RelExpression r) + { + if (r.operator == tok!"<>" + || r.operator == tok!"!<>" + || r.operator == tok!"!>" + || r.operator == tok!"!<" + || r.operator == tok!"!<>=" + || r.operator == tok!"!>=" + || r.operator == tok!"!<=") + { + addErrorMessage(r.line, r.column, "Avoid using the deprecated floating-point operators"); + } + r.accept(this); + } +} diff --git a/analysis/run.d b/analysis/run.d index 19fa961..d55a7d1 100644 --- a/analysis/run.d +++ b/analysis/run.d @@ -15,6 +15,8 @@ import analysis.base; import analysis.style; import analysis.enumarrayliteral; import analysis.pokemon; +import analysis.del; +import analysis.fish; void messageFunction(string fileName, size_t line, size_t column, string message, bool isError) @@ -63,8 +65,14 @@ void analyze(File output, string[] fileNames, bool staticAnalyze = true) auto pokemon = new PokemonExceptionCheck(fileName); pokemon.visit(m); + auto del = new DeleteCheck(fileName); + del.visit(m); + + auto fish = new FloatOperatorCheck(fileName); + fish.visit(m); + foreach (message; sort(chain(enums.messages, style.messages, - pokemon.messages).array)) + pokemon.messages, del.messages, fish.messages).array)) { writeln(message); } diff --git a/main.d b/main.d index 4427959..3b3103a 100644 --- a/main.d +++ b/main.d @@ -109,10 +109,11 @@ int main(string[] args) } else if (tokenDump) { + writeln("text blank\tindex\tline\tcolumn\tcomment"); foreach (token; tokens) { - writeln("«", token.text is null ? str(token.type) : token.text, - "» ", token.text !is null, " ", token.index, " ", token.line, " ", token.column, " ", + writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text, + token.text !is null, token.index, token.line, token.column, token.comment); } return 0; diff --git a/stdx/d/ast.d b/stdx/d/ast.d index dc21ae3..7dd5e26 100644 --- a/stdx/d/ast.d +++ b/stdx/d/ast.d @@ -31,6 +31,58 @@ import std.string; abstract class ASTVisitor { public: + + void visit(ExpressionNode n) + { + if (cast(AddExpression) n) visit(cast(AddExpression) n); + else if (cast(AndAndExpression) n) visit(cast(AndAndExpression) n); + else if (cast(AndExpression) n) visit(cast(AndExpression) n); + else if (cast(AsmAddExp) n) visit(cast(AsmAddExp) n); + else if (cast(AsmAndExp) n) visit(cast(AsmAndExp) n); + else if (cast(AsmEqualExp) n) visit(cast(AsmEqualExp) n); + else if (cast(AsmLogAndExp) n) visit(cast(AsmLogAndExp) n); + else if (cast(AsmLogOrExp) n) visit(cast(AsmLogOrExp) n); + else if (cast(AsmMulExp) n) visit(cast(AsmMulExp) n); + else if (cast(AsmOrExp) n) visit(cast(AsmOrExp) n); + else if (cast(AsmRelExp) n) visit(cast(AsmRelExp) n); + else if (cast(AsmShiftExp) n) visit(cast(AsmShiftExp) n); + else if (cast(AssertExpression) n) visit(cast(AssertExpression) n); + else if (cast(AssignExpression) n) visit(cast(AssignExpression) n); + else if (cast(CmpExpression) n) visit(cast(CmpExpression) n); + else if (cast(DeleteExpression) n) visit(cast(DeleteExpression) n); + else if (cast(EqualExpression) n) visit(cast(EqualExpression) n); + else if (cast(Expression) n) visit(cast(Expression) n); + else if (cast(FunctionCallExpression) n) visit(cast(FunctionCallExpression) n); + else if (cast(FunctionLiteralExpression) n) visit(cast(FunctionLiteralExpression) n); + else if (cast(IdentityExpression) n) visit(cast(IdentityExpression) n); + else if (cast(ImportExpression) n) visit(cast(ImportExpression) n); + else if (cast(IndexExpression) n) visit(cast(IndexExpression) n); + else if (cast(InExpression) n) visit(cast(InExpression) n); + else if (cast(IsExpression) n) visit(cast(IsExpression) n); + else if (cast(LambdaExpression) n) visit(cast(LambdaExpression) n); + else if (cast(MixinExpression) n) visit(cast(MixinExpression) n); + else if (cast(MulExpression) n) visit(cast(MulExpression) n); + else if (cast(NewAnonClassExpression) n) visit(cast(NewAnonClassExpression) n); + else if (cast(NewExpression) n) visit(cast(NewExpression) n); + else if (cast(OrExpression) n) visit(cast(OrExpression) n); + else if (cast(OrOrExpression) n) visit(cast(OrOrExpression) n); + else if (cast(PostIncDecExpression) n) visit(cast(PostIncDecExpression) n); + else if (cast(PowExpression) n) visit(cast(PowExpression) n); + else if (cast(PragmaExpression) n) visit(cast(PragmaExpression) n); + else if (cast(PreIncDecExpression) n) visit(cast(PreIncDecExpression) n); + else if (cast(PrimaryExpression) n) visit(cast(PrimaryExpression) n); + else if (cast(RelExpression) n) visit(cast(RelExpression) n); + else if (cast(ShiftExpression) n) visit(cast(ShiftExpression) n); + else if (cast(SliceExpression) n) visit(cast(SliceExpression) n); + else if (cast(TemplateMixinExpression) n) visit(cast(TemplateMixinExpression) n); + else if (cast(TernaryExpression) n) visit(cast(TernaryExpression) n); + else if (cast(TraitsExpression) n) visit(cast(TraitsExpression) n); + else if (cast(TypeidExpression) n) visit(cast(TypeidExpression) n); + else if (cast(TypeofExpression) n) visit(cast(TypeofExpression) n); + else if (cast(UnaryExpression) n) visit(cast(UnaryExpression) n); + else if (cast(XorExpression) n) visit(cast(XorExpression) n); + } + /** */ void visit(AddExpression addExpression) { addExpression.accept(this); } /** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); } /** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); } @@ -104,7 +156,6 @@ public: /** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); } /** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); } /** */ void visit(Expression expression) { expression.accept(this); } - /** */ void visit(ExpressionNode expressionNode) { expressionNode.accept(this); } /** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); } /** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); } /** */ void visit(Finally finally_) { finally_.accept(this); } @@ -234,10 +285,11 @@ public: interface ASTNode { +public: /** */ void accept(ASTVisitor visitor); } -immutable string DEFAULT_ACCEPT = q{void accept(ASTVisitor visitor) {}}; +immutable string DEFAULT_ACCEPT = q{override void accept(ASTVisitor visitor) {}}; template visitIfNotNull(fields ...) { @@ -259,19 +311,28 @@ template visitIfNotNull(fields ...) } } -abstract class ExpressionNode : ASTNode {} +abstract class ExpressionNode : ASTNode +{ +public: + override void accept(ASTVisitor visitor) + { + assert (false); + } +} mixin template BinaryExpressionBody() { ExpressionNode left; ExpressionNode right; + size_t line; + size_t column; } /// class AddExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -283,7 +344,7 @@ public: class AliasDeclaration : ASTNode { public: - void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(type, name, initializers)); } @@ -332,7 +393,7 @@ public: class AndAndExpression : ExpressionNode { public: - void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -343,7 +404,7 @@ public: class AndExpression : ExpressionNode { public: - void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -566,7 +627,7 @@ public: class AssertExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(assertion, message)); } @@ -578,7 +639,7 @@ public: class AssignExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(ternaryExpression, assignExpression)); } @@ -816,7 +877,7 @@ public: class CmpExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(shiftExpression, equalExpression, identityExpression, relExpression, inExpression)); @@ -1031,11 +1092,13 @@ public: class DeleteExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression)); } /** */ UnaryExpression unaryExpression; + /** */ size_t line; + /** */ size_t column; } /// @@ -1151,7 +1214,7 @@ public: class EqualExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1163,7 +1226,7 @@ public: class Expression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(items)); } @@ -1293,7 +1356,7 @@ public: class FunctionCallExpression : ExpressionNode { public: - void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments)); } @@ -1306,7 +1369,7 @@ public: class FunctionCallStatement : ASTNode { public: - void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(functionCallExpression)); } @@ -1338,7 +1401,7 @@ public: class FunctionLiteralExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(type, parameters, functionAttributes, functionBody)); @@ -1413,7 +1476,7 @@ public: class IdentityExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1478,7 +1541,7 @@ public: class ImportExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(assignExpression)); } @@ -1489,7 +1552,7 @@ public: class IndexExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression, argumentList)); } @@ -1501,7 +1564,7 @@ public: class InExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1575,7 +1638,7 @@ public: class IsExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(type, identifier, typeSpecialization, templateParameterList)); @@ -1626,7 +1689,7 @@ public: class LambdaExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(identifier, parameters, functionAttributes, assignExpression)); @@ -1689,7 +1752,7 @@ public: class MixinExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(assignExpression)); } @@ -1748,7 +1811,7 @@ public: class MulExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1760,7 +1823,7 @@ public: class NewAnonClassExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(allocatorArguments, constructorArguments, baseClassList, structBody)); @@ -1775,7 +1838,7 @@ public: class NewExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(newAnonClassExpression, type, arguments, assignExpression)); @@ -1863,7 +1926,7 @@ public: class OrExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1874,7 +1937,7 @@ public: class OrOrExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1937,7 +2000,7 @@ public: class PostIncDecExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression)); } @@ -1949,7 +2012,7 @@ public: class PowExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -1971,7 +2034,7 @@ public: class PragmaExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(identifier, argumentList)); } @@ -1983,7 +2046,7 @@ public: class PreIncDecExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression)); } @@ -1995,7 +2058,7 @@ public: class PrimaryExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(basicType, primary, typeofExpression, typeidExpression, arrayLiteral, assocArrayLiteral, expression, @@ -2035,7 +2098,7 @@ public: class RelExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -2096,7 +2159,7 @@ public: class ShiftExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } @@ -2120,7 +2183,7 @@ public: class SliceExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(unaryExpression, lower, upper)); } @@ -2409,7 +2472,7 @@ public: class TemplateMixinExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName)); } @@ -2534,7 +2597,7 @@ public: class TernaryExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression)); } @@ -2558,7 +2621,7 @@ public: class TraitsExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(identifier, templateArgumentList)); } @@ -2647,7 +2710,7 @@ public: class TypeidExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(type, expression)); } @@ -2659,7 +2722,7 @@ public: class TypeofExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(expression, return_)); } @@ -2671,7 +2734,7 @@ public: class UnaryExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { // TODO prefix, postfix, unary mixin (visitIfNotNull!(primaryExpression, newExpression, @@ -2803,7 +2866,7 @@ public: class XorExpression : ExpressionNode { public: - /+override+/ void accept(ASTVisitor visitor) + override void accept(ASTVisitor visitor) { mixin (visitIfNotNull!(left, right)); } diff --git a/stdx/d/parser.d b/stdx/d/parser.d index 0c6aea1..8164ef3 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -1874,6 +1874,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new DeleteExpression; + node.line = current.line; + node.column = current.column; if (expect(tok!"delete") is null) return null; node.unaryExpression = parseUnaryExpression(); return node; @@ -3990,6 +3992,7 @@ q{(int a, ...) */ PragmaDeclaration parsePragmaDeclaration() { + mixin (traceEnterAndExit!(__FUNCTION__)); auto node = new PragmaDeclaration; node.pragmaExpression = parsePragmaExpression(); expect(tok!";"); @@ -4005,6 +4008,7 @@ q{(int a, ...) */ PragmaExpression parsePragmaExpression() { + mixin (traceEnterAndExit!(__FUNCTION__)); auto node = new PragmaExpression; expect(tok!"pragma"); expect(tok!"("); @@ -4264,8 +4268,9 @@ q{(int a, ...) * | $(LITERAL '!<=') * ;) */ - ExpressionNode parseRelExpression(ExpressionNode shift = null) + ExpressionNode parseRelExpression(ExpressionNode shift) { + mixin (traceEnterAndExit!(__FUNCTION__)); return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression, tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>", tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<", @@ -6238,7 +6243,11 @@ protected: { auto n = new ExpressionType; static if (__traits(hasMember, ExpressionType, "operator")) + { + n.line = current.line; + n.column = current.column; n.operator = advance().type; + } else advance(); n.left = node; diff --git a/stdx/lexer.d b/stdx/lexer.d index 850253e..c013d2f 100644 --- a/stdx/lexer.d +++ b/stdx/lexer.d @@ -1,8 +1,99 @@ // Written in the D programming language /** + * $(H2 Summary) * This module contains a range-based _lexer generator. * + * $(H2 Overview) + * The _lexer generator consists of a template mixin, $(LREF Lexer), along with + * several helper templates for generating such things as token identifiers. + * + * To write a _lexer using this API: + * $(OL + * $(LI Create the string array costants for your language. + * $(UL + * $(LI $(LINK2 #.StringConstants, String Constants)) + * )) + * $(LI Create aliases for the various token and token identifier types + * specific to your language. + * $(UL + * $(LI $(LREF TokenIdType)) + * $(LI $(LREF tokenStringRepresentation)) + * $(LI $(LREF TokenStructure)) + * $(LI $(LREF TokenId)) + * )) + * $(LI Create a struct that mixes in the Lexer template mixin and + * implements the necessary functions. + * $(UL + * $(LI $(LREF Lexer)) + * )) + * ) + * Examples: + * $(UL + * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).) + * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).) + * ) + * $(DDOC_ANCHOR StringConstants) $(H2 String Constants) + * $(DL + * $(DT $(B staticTokens)) + * $(DD A listing of the tokens whose exact value never changes and which cannot + * possibly be a token handled by the default token lexing function. The + * most common example of this kind of token is an operator such as + * $(D_STRING "*"), or $(D_STRING "-") in a programming language.) + * $(DT $(B dynamicTokens)) + * $(DD A listing of tokens whose value is variable, such as whitespace, + * identifiers, number literals, and string literals.) + * $(DT $(B possibleDefaultTokens)) + * $(DD A listing of tokens that could posibly be one of the tokens handled by + * the default token handling function. An common example of this is + * a keyword such as $(D_STRING "for"), which looks like the beginning of + * the identifier $(D_STRING "fortunate"). isSeparating is called to + * determine if the character after the $(D_STRING 'r') separates the + * identifier, indicating that the token is $(D_STRING "for"), or if lexing + * should be turned over to the defaultTokenFunction.) + * $(DT $(B tokenHandlers)) + * $(DD A mapping of prefixes to custom token handling function names. The + * generated _lexer will search for the even-index elements of this array, + * and then call the function whose name is the element immedately after the + * even-indexed element. This is used for lexing complex tokens whose prefix + * is fixed.) + * ) + * + * Here are some example constants for a simple calculator _lexer: + * --- + * // There are a near infinite number of valid number literals, so numbers are + * // dynamic tokens. + * enum string[] dynamicTokens = ["numberLiteral", "whitespace"]; + * + * // The operators are always the same, and cannot start a numberLiteral, so + * // they are staticTokens + * enum string[] staticTokens = ["-", "+", "*", "/"]; + * + * // In this simple example there are no keywords or other tokens that could + * // look like dynamic tokens, so this is blank. + * enum string[] possibleDefaultTokens = []; + * + * // If any whitespace character or digit is encountered, pass lexing over to + * // our custom handler functions. These will be demonstrated in an example + * // later on. + * enum string[] tokenHandlers = [ + * "0", "lexNumber", + * "1", "lexNumber", + * "2", "lexNumber", + * "3", "lexNumber", + * "4", "lexNumber", + * "5", "lexNumber", + * "6", "lexNumber", + * "7", "lexNumber", + * "8", "lexNumber", + * "9", "lexNumber", + * " ", "lexWhitespace", + * "\n", "lexWhitespace", + * "\t", "lexWhitespace", + * "\r", "lexWhitespace" + * ]; + * --- + * * Copyright: Brian Schott 2013 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0) * Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu @@ -16,7 +107,12 @@ module stdx.lexer; * unsigned integral type that is able to hold the value * staticTokens.length + dynamicTokens.length. For example if there are 20 * static tokens, 30 dynamic tokens, and 10 possible default tokens, this - * template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max. + * template will alias itself to ubyte, as 20 + 30 + 10 < $(D_KEYWORD ubyte).max. + * Examples: + * --- + * // In our calculator example this means that IdType is an alias for ubyte. + * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens); + * --- */ template TokenIdType(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens) @@ -32,7 +128,15 @@ template TokenIdType(alias staticTokens, alias dynamicTokens, } /** - * Looks up the string representation of the given token type. + * Looks up the string representation of the given token type. This is the + * opposite of the function of the TokenId template. + * Params: type = the token type identifier + * Examples: + * --- + * alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens); + * assert (str(tok!"*") == "*"); + * --- + * See_also: $(LREF TokenId) */ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property { @@ -57,18 +161,18 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens * valid token type identifier) * ) * In all cases this template will alias itself to a constant of type IdType. + * This template will fail at compile time if $(D_PARAM symbol) is not one of + * the staticTokens, dynamicTokens, or possibleDefaultTokens. * Examples: * --- - * enum string[] staticTokens = ["+", "-", "*", "/"]; - * enum string[] dynamicTokens = ["number"]; - * enum string[] possibleDefaultTokens = []; - * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens); * template tok(string symbol) * { * alias tok = TokenId!(IdType, staticTokens, dynamicTokens, * possibleDefaultTokens, symbol); * } + * // num and plus are of type ubyte. * IdType plus = tok!"+"; + * IdType num = tok!"numberLiteral"; * --- */ template TokenId(IdType, alias staticTokens, alias dynamicTokens, @@ -118,35 +222,49 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens, /** * The token that is returned by the lexer. * Params: - * IDType = The D type of the "type" token type field. + * IdType = The D type of the "type" token type field. * extraFields = A string containing D code for any extra fields that should * be included in the token structure body. This string is passed * directly to a mixin statement. + * Examples: + * --- + * // No extra struct fields are desired in this example, so leave it blank. + * alias Token = TokenStructure!(IdType, ""); + * Token minusToken = Token(tok!"-"); + * --- */ -struct TokenStructure(IDType, string extraFields = "") +struct TokenStructure(IdType, string extraFields = "") { public: /** * == overload for the the token type. */ - bool opEquals(IDType type) const pure nothrow @safe + bool opEquals(IdType type) const pure nothrow @safe { return this.type == type; } /** - * + * Constructs a token from a token type. + * Params: type = the token type */ - this(IDType type) + this(IdType type) { this.type = type; } /** - * + * Constructs a token. + * Params: + * type = the token type + * text = the text of the token, which may be null + * line = the line number at which this token occurs + * column = the column nmuber at which this token occurs + * index = the byte offset from the beginning of the input at which this + * token occurs */ - this(IDType type, string text, size_t line, size_t column, size_t index) + this(IdType type, string text, size_t line, size_t column, size_t index) { this.text = text; this.line = line; @@ -156,39 +274,105 @@ public: } /** - * + * The _text of the token. */ string text; /** - * + * The line number at which this token occurs. */ size_t line; /** - * + * The Column nmuber at which this token occurs. */ size_t column; /** - * + * The byte offset from the beginning of the input at which this token + * occurs. */ size_t index; /** - * + * The token type. */ - IDType type; + IdType type; mixin (extraFields); } +/** + * The implementation of the _lexer is contained within this mixin template. + * To use it, this template should be mixed in to a struct that represents the + * _lexer for your language. This struct should implement the following methods: + * $(UL + * $(LI popFront, which should call this mixin's _popFront() and + * additionally perform any token filtering or shuffling you deem + * necessary. For example, you can implement popFront to skip comment or + * tokens.) + * $(LI A function that serves as the default token lexing function. For + * most languages this will be the identifier lexing function.) + * $(LI A function that is able to determine if an identifier/keyword has + * come to an end. This function must retorn $(D_KEYWORD bool) and take + * a single $(D_KEYWORD size_t) argument representing the number of + * bytes to skip over before looking for a separating character.) + * $(LI Any functions referred to in the tokenHandlers template paramater. + * These functions must be marked $(D_KEYWORD pure nothrow), take no + * arguments, and return a token) + * $(LI A constructor that initializes the range field as well as calls + * popFront() exactly once (to initialize the _front field).) + * ) + * Examples: + * --- + * struct CalculatorLexer + * { + * mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating, + * staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens); + * + * this (ubyte[] bytes) + * { + * this.range = LexerRange(bytes); + * popFront(); + * } + * + * void popFront() pure + * { + * _popFront(); + * } + * + * Token lexNumber() pure nothrow @safe + * { + * ... + * } + * + * Token lexWhitespace() pure nothrow @safe + * { + * ... + * } + * + * Token defaultTokenFunction() pure nothrow @safe + * { + * // There is no default token in the example calculator language, so + * // this is always an error. + * range.popFront(); + * return Token(tok!""); + * } + * + * bool isSeparating(size_t offset) pure nothrow @safe + * { + * // For this example language, always return true. + * return true; + * } + * } + * --- + */ mixin template Lexer(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, - alias pseudoTokenHandlers, alias possibleDefaultTokens) + alias tokenHandlers, alias possibleDefaultTokens) { - static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must" + static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must" ~ " have a corresponding handler function name."); static string generateMask(const ubyte[] arr) @@ -214,7 +398,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, import std.string; import std.range; - string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2)); + string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2)); string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq); string code; for (size_t i = 0; i < allTokens.length; i++) @@ -240,7 +424,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, if (pseudoTokens.countUntil(tokens[0]) >= 0) { return " return " - ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1] + ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1] ~ "();\n"; } else if (staticTokens.countUntil(tokens[0]) >= 0) @@ -251,7 +435,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, else if (pseudoTokens.countUntil(tokens[0]) >= 0) { return " return " - ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1] + ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1] ~ "();\n"; } } @@ -271,14 +455,14 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, if (token.length <= 8) { code ~= " return " - ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1] + ~ tokenHandlers[tokenHandlers.countUntil(token) + 1] ~ "();\n"; } else { code ~= " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n"; code ~= " return " - ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1] + ~ tokenHandlers[tokenHandlers.countUntil(token) + 1] ~ "();\n"; } } @@ -325,16 +509,23 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, return code; } + /** + * Implements the range primitive front(). + */ ref const(Token) front() pure nothrow const @property { return _front; } + void _popFront() pure { _front = advance(); } + /** + * Implements the range primitive empty(). + */ bool empty() pure const nothrow @property { return _front.type == tok!"\0"; @@ -359,9 +550,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, return retVal; } - /** - * This only exists because the real array() can't be called at compile-time - */ + // This only exists because the real array() can't be called at compile-time static string[] stupidToArray(R)(R range) { string[] retVal; @@ -397,13 +586,30 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, } } + /** + * The lexer input. + */ LexerRange range; + + /** + * The token that is currently at the front of the range. + */ Token _front; } +/** + * Range structure that wraps the _lexer's input. + */ struct LexerRange { + /** + * Params: + * bytes = the _lexer input + * index = the initial offset from the beginning of $(D_PARAM bytes) + * column = the initial column number + * line = the initial line number + */ this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe { this.bytes = bytes; @@ -412,31 +618,52 @@ struct LexerRange this.line = line; } + /** + * Returns: a mark at the current position that can then be used with slice. + */ size_t mark() const nothrow pure @safe { return index; } + /** + * Sets the range to the given position + * Params: m = the position to seek to + */ void seek(size_t m) nothrow pure @safe { index = m; } + /** + * Returs a slice of the input byte array betwene the given mark and the + * current position. + * Params m = the beginning index of the slice to return + */ const(ubyte)[] slice(size_t m) const nothrow pure @safe { return bytes[m .. index]; } + /** + * Implements the range primitive _empty. + */ bool empty() const nothrow pure @safe { return index >= bytes.length; } + /** + * Implements the range primitive _front. + */ ubyte front() const nothrow pure @safe { return bytes[index]; } + /** + * Returns: the current item as well as the items $(D_PARAM p) items ahead. + */ const(ubyte)[] peek(size_t p) const nothrow pure @safe { return index + p + 1 > bytes.length @@ -444,48 +671,79 @@ struct LexerRange : bytes[index .. index + p + 1]; } + /** + * + */ ubyte peekAt(size_t offset) const nothrow pure @safe { return bytes[index + offset]; } + /** + * Returns: true if it is possible to peek $(D_PARAM p) bytes ahead. + */ bool canPeek(size_t p) const nothrow pure @safe { return index + p < bytes.length; } + /** + * Implements the range primitive _popFront. + */ void popFront() pure nothrow @safe { index++; column++; } + /** + * Implements the algorithm _popFrontN more efficiently. + */ void popFrontN(size_t n) pure nothrow @safe { index += n; + column += n; } + /** + * Increments the range's line number and resets the column counter. + */ void incrementLine() pure nothrow @safe { column = 1; line++; } + /** + * The input _bytes. + */ const(ubyte)[] bytes; + + /** + * The range's current position. + */ size_t index; + + /** + * The current _column number. + */ size_t column; + + /** + * The current _line number. + */ size_t line; } /** - * The string cache should be used within lexer implementations for several - * reasons: - * $(UL - * $(LI Reducing memory consumption.) - * $(LI Increasing performance in token comparisons) - * $(LI Correctly creating immutable token text if the lexing source is not - * immutable) - * ) + * The string cache implements a map/set for strings. Placing a string in the + * cache returns an identifier that can be used to instantly access the stored + * string. It is then possible to simply compare these indexes instead of + * performing full string comparisons when comparing the string content of + * dynamic tokens. The string cache also handles its own memory, so that mutable + * ubyte[] to lexers can still have immutable string fields in their tokens. + * Because the string cache also performs de-duplication it is possible to + * drastically reduce the memory usage of a lexer. */ struct StringCache { @@ -493,7 +751,10 @@ public: @disable this(); - this(size_t bucketCount = defaultBucketCount) + /** + * Params: bucketCount = the initial number of buckets. + */ + this(size_t bucketCount) { buckets = new Item*[bucketCount]; } @@ -512,6 +773,9 @@ public: return get(cache(bytes)); } + /** + * Equivalent to calling cache() and get(). + */ string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe { return get(cache(bytes, hash)); @@ -536,6 +800,11 @@ public: return cache(bytes, hash); } + /** + * Caches a string as above, but uses the given has code instead of + * calculating one itself. Use this alongside hashStep() can reduce the + * amount of work necessary when lexing dynamic tokens. + */ size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe in { @@ -583,11 +852,21 @@ public: writeln("rehashes: ", rehashCount); } + /** + * Incremental hashing. + * Params: + * b = the byte to add to the hash + * h = the hash that has been calculated so far + * Returns: the new hash code for the string. + */ static uint hashStep(ubyte b, uint h) pure nothrow @safe { return (h ^ sbox[b]) * 3; } + /** + * The default bucket count for the string cache. + */ static enum defaultBucketCount = 2048; private: diff --git a/stdx/lexer.html b/stdx/lexer.html new file mode 100644 index 0000000..8881aa0 --- /dev/null +++ b/stdx/lexer.html @@ -0,0 +1,483 @@ +

stdx.lexer

+This module contains a range-based lexer generator. +

+The lexer generator consists of a template mixin, Lexer, along with several + helper templates for generating such things as token identifiers. +

+ + To generate a lexer using this API, several constants must be supplied: +
staticTokens
+
A listing of the tokens whose exact value never changes and which cannot + possibly be a token handled by the default token lexing function. The + most common example of this kind of token is an operator such as "*", or + "-" in a programming language.
+
dynamicTokens
+
A listing of tokens whose value is variable, such as whitespace, + identifiers, number literals, and string literals.
+
possibleDefaultTokens
+
A listing of tokens that could posibly be one of the tokens handled by + the default token handling function. An common example of this is + a keyword such as "for", which looks like the beginning of + the identifier "fortunate". isSeparating is called to + determine if the character after the 'r' separates the + identifier, indicating that the token is "for", or if lexing + should be turned over to the defaultTokenFunction.
+
tokenHandlers
+
A mapping of prefixes to custom token handling function names. The + generated lexer will search for the even-index elements of this array, + and then call the function whose name is the element immedately after the + even-indexed element. This is used for lexing complex tokens whose prefix + is fixed.
+
+

+ + Here are some example constants for a simple calculator lexer: +
// There are a near infinite number of valid number literals, so numbers are
+// dynamic tokens.
+enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
+
+// The operators are always the same, and cannot start a numberLiteral, so
+// they are staticTokens
+enum string[] staticTokens = ["-", "+", "*", "/"];
+
+// In this simple example there are no keywords or other tokens that could
+// look like dynamic tokens, so this is blank.
+enum string[] possibleDefaultTokens = [];
+
+// If any whitespace character or digit is encountered, pass lexing over to
+// our custom handler functions. These will be demonstrated in an example
+// later on.
+enum string[] tokenHandlers = [
+    "0", "lexNumber",
+    "1", "lexNumber",
+    "2", "lexNumber",
+    "3", "lexNumber",
+    "4", "lexNumber",
+    "5", "lexNumber",
+    "6", "lexNumber",
+    "7", "lexNumber",
+    "8", "lexNumber",
+    "9", "lexNumber",
+    " ", "lexWhitespace",
+    "\n", "lexWhitespace",
+    "\t", "lexWhitespace",
+    "\r", "lexWhitespace"
+];
+
+ +

+Examples:
+

+License:
License 1.0 +

+Authors:
Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu +

+Source:
+std/lexer.d

+ +
template TokenIdType(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)
+
Template for determining the type used for a token type. Selects the smallest + unsigned integral type that is able to hold the value + staticTokens.length + dynamicTokens.length. For example if there are 20 + static tokens, 30 dynamic tokens, and 10 possible default tokens, this + template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max. +

+Examples:
// In our calculator example this means that IdType is an alias for ubyte.
+alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
+
+

+ +
+
@property string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type); +
+
Looks up the string representation of the given token type. This is the + opposite of the function of the TokenId template. +

+Parameters: + +
IdType typethe token type identifier

+Examples:
alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
+assert (str(tok!"*") == "*");
+
+

+See Also:
TokenId

+ +
+
template TokenId(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)
+
Generates the token type identifier for the given symbol. There are two + special cases: + + In all cases this template will alias itself to a constant of type IdType. + This template will fail at compile time if symbol is not one of + the staticTokens, dynamicTokens, or possibleDefaultTokens. +

+Examples:
template tok(string symbol)
+{
+    alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
+        possibleDefaultTokens, symbol);
+}
+// num and plus are of type ubyte.
+IdType plus = tok!"+";
+IdType num = tok!"numberLiteral";
+
+

+ +
+
struct TokenStructure(IdType, string extraFields = ""); +
+
The token that is returned by the lexer. +

+Parameters: + + + +
IdTypeThe D type of the "type" token type field.
extraFieldsA string containing D code for any extra fields that should + be included in the token structure body. This string is passed + directly to a mixin statement.

+Examples:
// No extra struct fields are desired in this example, so leave it blank.
+alias Token = TokenStructure!(IdType, "");
+Token minusToken = Token(tok!"-");
+
+

+ +
const pure nothrow @safe bool opEquals(IdType type); +
+
== overload for the the token type.

+ +
+
this(IdType type); +
+
Constructs a token from a token type. +

+Parameters: + +
IdType typethe token type

+ +
+
this(IdType type, string text, size_t line, size_t column, size_t index); +
+
Constructs a token. +

+Parameters: + + + + + + + + + +
IdType typethe token type
string textthe text of the token, which may be null
size_t linethe line number at which this token occurs
size_t columnthe column nmuber at which this token occurs
size_t indexthe byte offset from the beginning of the input at which this + token occurs

+ +
+
string text; +
+
The text of the token.

+ +
+
size_t line; +
+
The line number at which this token occurs.

+ +
+
size_t column; +
+
The Column nmuber at which this token occurs.

+ +
+
size_t index; +
+
The byte offset from the beginning of the input at which this token + occurs.

+ +
+
IdType type; +
+
The token type.

+ +
+
+
+
template Lexer(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)
+
The implementation of the lexer is contained within this mixin template. + To use it, this template should be mixed in to a struct that represents the + lexer for your language. This struct should implement the following methods: + +

+Examples:
struct CalculatorLexer
+{
+    mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
+        staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
+
+    this (ubyte[] bytes)
+    {
+        this.range = LexerRange(bytes);
+        popFront();
+    }
+
+    void popFront() pure
+    {
+        _popFront();
+    }
+
+    Token lexNumber() pure nothrow @safe
+    {
+        ...
+    }
+
+    Token lexWhitespace() pure nothrow @safe
+    {
+        ...
+    }
+
+    Token defaultTokenFunction() pure nothrow @safe
+    {
+        // There is no default token in the example calculator language, so
+        // this is always an error.
+        range.popFront();
+        return Token(tok!"");
+    }
+
+    bool isSeparating(size_t offset) pure nothrow @safe
+    {
+        // For this example language, always return true.
+        return true;
+    }
+}
+
+

+ +
const pure nothrow @property const(Token) front(); +
+
Implements the range primitive front().

+ +
+
const pure nothrow @property bool empty(); +
+
Implements the range primitive empty().

+ +
+
LexerRange range; +
+
The lexer input.

+ +
+
Token _front; +
+
The token that is currently at the front of the range.

+ +
+
+
+
struct LexerRange; +
+
Range structure that wraps the lexer's input.

+ +
pure nothrow @safe this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1); +
+
Parameters: + + + + + + + +
const(ubyte)[] bytesthe lexer input
size_t indexthe initial offset from the beginning of bytes
size_t columnthe initial column number
size_t linethe initial line number

+ +
+
const pure nothrow @safe size_t mark(); +
+
Returns:
a mark at the current position that can then be used with slice.

+ +
+
pure nothrow @safe void seek(size_t m); +
+
Sets the range to the given position +

+Parameters: + +
size_t mthe position to seek to

+ +
+
const pure nothrow @safe const(ubyte)[] slice(size_t m); +
+
Returs a slice of the input byte array betwene the given mark and the + current position. + Params m = the beginning index of the slice to return

+ +
+
const pure nothrow @safe bool empty(); +
+
Implements the range primitive empty.

+ +
+
const pure nothrow @safe ubyte front(); +
+
Implements the range primitive front.

+ +
+
const pure nothrow @safe const(ubyte)[] peek(size_t p); +
+
Returns:
the current item as well as the items p items ahead.

+ +
+
const pure nothrow @safe ubyte peekAt(size_t offset); +
+

+
+
const pure nothrow @safe bool canPeek(size_t p); +
+
Returns:
true if it is possible to peek p bytes ahead.

+ +
+
pure nothrow @safe void popFront(); +
+
Implements the range primitive popFront.

+ +
+
pure nothrow @safe void popFrontN(size_t n); +
+
Implements the algorithm popFrontN more efficiently.

+ +
+
pure nothrow @safe void incrementLine(); +
+
Increments the range's line number and resets the column counter.

+ +
+
const(ubyte)[] bytes; +
+
The input bytes.

+ +
+
size_t index; +
+
The range's current position.

+ +
+
size_t column; +
+
The current column number.

+ +
+
size_t line; +
+
The current line number.

+ +
+
+
+
struct StringCache; +
+
The string cache implements a map/set for strings. Placing a string in the + cache returns an identifier that can be used to instantly access the stored + string. It is then possible to simply compare these indexes instead of + performing full string comparisons when comparing the string content of + dynamic tokens. The string cache also handles its own memory, so that mutable + ubyte[] to lexers can still have immutable string fields in their tokens. + Because the string cache also performs de-duplication it is possible to + drastically reduce the memory usage of a lexer.

+ +
this(size_t bucketCount); +
+
Parameters: + +
size_t bucketCountthe initial number of buckets.

+ +
+
pure nothrow @safe string cacheGet(const(ubyte[]) bytes); +
+
Equivalent to calling cache() and get(). +
StringCache cache;
+ubyte[] str = ['a', 'b', 'c'];
+string s = cache.get(cache.cache(str));
+assert(s == "abc");
+
+

+ +
+
pure nothrow @safe string cacheGet(const(ubyte[]) bytes, uint hash); +
+
Equivalent to calling cache() and get().

+ +
+
pure nothrow @safe size_t cache(const(ubyte)[] bytes); +
+
Caches a string. +

+Parameters: + +
const(ubyte)[] bytesthe string to cache

+Returns:
A key that can be used to retrieve the cached string +

+Examples:
StringCache cache;
+ubyte[] bytes = ['a', 'b', 'c'];
+size_t first = cache.cache(bytes);
+size_t second = cache.cache(bytes);
+assert (first == second);
+
+

+ +
+
pure nothrow @safe size_t cache(const(ubyte)[] bytes, uint hash); +
+
Caches a string as above, but uses the given has code instead of + calculating one itself. Use this alongside hashStep() can reduce the + amount of work necessary when lexing dynamic tokens.

+ +
+
const pure nothrow @safe string get(size_t index); +
+
Gets a cached string based on its key. +

+Parameters: + +
size_t indexthe key

+Returns:
the cached string

+ +
+
static pure nothrow @safe uint hashStep(ubyte b, uint h); +
+
Incremental hashing. +

+Parameters: + + + +
ubyte bthe byte to add to the hash
uint hthe hash that has been calculated so far

+Returns:
the new hash code for the string.

+ +
+
static int defaultBucketCount; +
+
The default bucket count for the string cache.

+ +
+
+
+
+ +

[top]
diff --git a/stdx/lexer.o b/stdx/lexer.o new file mode 100644 index 0000000..162acdb Binary files /dev/null and b/stdx/lexer.o differ