Updated lexer docs. Implemented delete and fp operator rules. Fixed bug with AST traversal
This commit is contained in:
parent
2f78272fed
commit
d13d680b74
|
@ -0,0 +1,29 @@
|
|||
// Copyright Brian Schott (Sir Alaran) 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
module analysis.del;
|
||||
|
||||
import stdx.d.ast;
|
||||
import stdx.d.lexer;
|
||||
import analysis.base;
|
||||
|
||||
/**
|
||||
* Checks for use of the deprecated "delete" keyword
|
||||
*/
|
||||
class DeleteCheck : BaseAnalyzer
|
||||
{
|
||||
alias visit = BaseAnalyzer.visit;
|
||||
|
||||
this(string fileName)
|
||||
{
|
||||
super(fileName);
|
||||
}
|
||||
|
||||
override void visit(DeleteExpression d)
|
||||
{
|
||||
addErrorMessage(d.line, d.column, "Avoid using the deprecated delete keyword");
|
||||
d.accept(this);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright Brian Schott (Sir Alaran) 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
module analysis.fish;
|
||||
|
||||
import stdx.d.ast;
|
||||
import stdx.d.lexer;
|
||||
import analysis.base;
|
||||
|
||||
/**
|
||||
* Checks for use of the deprecated floating point comparison operators.
|
||||
*/
|
||||
class FloatOperatorCheck : BaseAnalyzer
|
||||
{
|
||||
alias visit = BaseAnalyzer.visit;
|
||||
|
||||
this(string fileName)
|
||||
{
|
||||
super(fileName);
|
||||
}
|
||||
|
||||
override void visit(RelExpression r)
|
||||
{
|
||||
if (r.operator == tok!"<>"
|
||||
|| r.operator == tok!"!<>"
|
||||
|| r.operator == tok!"!>"
|
||||
|| r.operator == tok!"!<"
|
||||
|| r.operator == tok!"!<>="
|
||||
|| r.operator == tok!"!>="
|
||||
|| r.operator == tok!"!<=")
|
||||
{
|
||||
addErrorMessage(r.line, r.column, "Avoid using the deprecated floating-point operators");
|
||||
}
|
||||
r.accept(this);
|
||||
}
|
||||
}
|
|
@ -15,6 +15,8 @@ import analysis.base;
|
|||
import analysis.style;
|
||||
import analysis.enumarrayliteral;
|
||||
import analysis.pokemon;
|
||||
import analysis.del;
|
||||
import analysis.fish;
|
||||
|
||||
void messageFunction(string fileName, size_t line, size_t column, string message,
|
||||
bool isError)
|
||||
|
@ -63,8 +65,14 @@ void analyze(File output, string[] fileNames, bool staticAnalyze = true)
|
|||
auto pokemon = new PokemonExceptionCheck(fileName);
|
||||
pokemon.visit(m);
|
||||
|
||||
auto del = new DeleteCheck(fileName);
|
||||
del.visit(m);
|
||||
|
||||
auto fish = new FloatOperatorCheck(fileName);
|
||||
fish.visit(m);
|
||||
|
||||
foreach (message; sort(chain(enums.messages, style.messages,
|
||||
pokemon.messages).array))
|
||||
pokemon.messages, del.messages, fish.messages).array))
|
||||
{
|
||||
writeln(message);
|
||||
}
|
||||
|
|
5
main.d
5
main.d
|
@ -109,10 +109,11 @@ int main(string[] args)
|
|||
}
|
||||
else if (tokenDump)
|
||||
{
|
||||
writeln("text blank\tindex\tline\tcolumn\tcomment");
|
||||
foreach (token; tokens)
|
||||
{
|
||||
writeln("«", token.text is null ? str(token.type) : token.text,
|
||||
"» ", token.text !is null, " ", token.index, " ", token.line, " ", token.column, " ",
|
||||
writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
|
||||
token.text !is null, token.index, token.line, token.column,
|
||||
token.comment);
|
||||
}
|
||||
return 0;
|
||||
|
|
149
stdx/d/ast.d
149
stdx/d/ast.d
|
@ -31,6 +31,58 @@ import std.string;
|
|||
abstract class ASTVisitor
|
||||
{
|
||||
public:
|
||||
|
||||
void visit(ExpressionNode n)
|
||||
{
|
||||
if (cast(AddExpression) n) visit(cast(AddExpression) n);
|
||||
else if (cast(AndAndExpression) n) visit(cast(AndAndExpression) n);
|
||||
else if (cast(AndExpression) n) visit(cast(AndExpression) n);
|
||||
else if (cast(AsmAddExp) n) visit(cast(AsmAddExp) n);
|
||||
else if (cast(AsmAndExp) n) visit(cast(AsmAndExp) n);
|
||||
else if (cast(AsmEqualExp) n) visit(cast(AsmEqualExp) n);
|
||||
else if (cast(AsmLogAndExp) n) visit(cast(AsmLogAndExp) n);
|
||||
else if (cast(AsmLogOrExp) n) visit(cast(AsmLogOrExp) n);
|
||||
else if (cast(AsmMulExp) n) visit(cast(AsmMulExp) n);
|
||||
else if (cast(AsmOrExp) n) visit(cast(AsmOrExp) n);
|
||||
else if (cast(AsmRelExp) n) visit(cast(AsmRelExp) n);
|
||||
else if (cast(AsmShiftExp) n) visit(cast(AsmShiftExp) n);
|
||||
else if (cast(AssertExpression) n) visit(cast(AssertExpression) n);
|
||||
else if (cast(AssignExpression) n) visit(cast(AssignExpression) n);
|
||||
else if (cast(CmpExpression) n) visit(cast(CmpExpression) n);
|
||||
else if (cast(DeleteExpression) n) visit(cast(DeleteExpression) n);
|
||||
else if (cast(EqualExpression) n) visit(cast(EqualExpression) n);
|
||||
else if (cast(Expression) n) visit(cast(Expression) n);
|
||||
else if (cast(FunctionCallExpression) n) visit(cast(FunctionCallExpression) n);
|
||||
else if (cast(FunctionLiteralExpression) n) visit(cast(FunctionLiteralExpression) n);
|
||||
else if (cast(IdentityExpression) n) visit(cast(IdentityExpression) n);
|
||||
else if (cast(ImportExpression) n) visit(cast(ImportExpression) n);
|
||||
else if (cast(IndexExpression) n) visit(cast(IndexExpression) n);
|
||||
else if (cast(InExpression) n) visit(cast(InExpression) n);
|
||||
else if (cast(IsExpression) n) visit(cast(IsExpression) n);
|
||||
else if (cast(LambdaExpression) n) visit(cast(LambdaExpression) n);
|
||||
else if (cast(MixinExpression) n) visit(cast(MixinExpression) n);
|
||||
else if (cast(MulExpression) n) visit(cast(MulExpression) n);
|
||||
else if (cast(NewAnonClassExpression) n) visit(cast(NewAnonClassExpression) n);
|
||||
else if (cast(NewExpression) n) visit(cast(NewExpression) n);
|
||||
else if (cast(OrExpression) n) visit(cast(OrExpression) n);
|
||||
else if (cast(OrOrExpression) n) visit(cast(OrOrExpression) n);
|
||||
else if (cast(PostIncDecExpression) n) visit(cast(PostIncDecExpression) n);
|
||||
else if (cast(PowExpression) n) visit(cast(PowExpression) n);
|
||||
else if (cast(PragmaExpression) n) visit(cast(PragmaExpression) n);
|
||||
else if (cast(PreIncDecExpression) n) visit(cast(PreIncDecExpression) n);
|
||||
else if (cast(PrimaryExpression) n) visit(cast(PrimaryExpression) n);
|
||||
else if (cast(RelExpression) n) visit(cast(RelExpression) n);
|
||||
else if (cast(ShiftExpression) n) visit(cast(ShiftExpression) n);
|
||||
else if (cast(SliceExpression) n) visit(cast(SliceExpression) n);
|
||||
else if (cast(TemplateMixinExpression) n) visit(cast(TemplateMixinExpression) n);
|
||||
else if (cast(TernaryExpression) n) visit(cast(TernaryExpression) n);
|
||||
else if (cast(TraitsExpression) n) visit(cast(TraitsExpression) n);
|
||||
else if (cast(TypeidExpression) n) visit(cast(TypeidExpression) n);
|
||||
else if (cast(TypeofExpression) n) visit(cast(TypeofExpression) n);
|
||||
else if (cast(UnaryExpression) n) visit(cast(UnaryExpression) n);
|
||||
else if (cast(XorExpression) n) visit(cast(XorExpression) n);
|
||||
}
|
||||
|
||||
/** */ void visit(AddExpression addExpression) { addExpression.accept(this); }
|
||||
/** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); }
|
||||
/** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); }
|
||||
|
@ -104,7 +156,6 @@ public:
|
|||
/** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); }
|
||||
/** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); }
|
||||
/** */ void visit(Expression expression) { expression.accept(this); }
|
||||
/** */ void visit(ExpressionNode expressionNode) { expressionNode.accept(this); }
|
||||
/** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); }
|
||||
/** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); }
|
||||
/** */ void visit(Finally finally_) { finally_.accept(this); }
|
||||
|
@ -234,10 +285,11 @@ public:
|
|||
|
||||
interface ASTNode
|
||||
{
|
||||
public:
|
||||
/** */ void accept(ASTVisitor visitor);
|
||||
}
|
||||
|
||||
immutable string DEFAULT_ACCEPT = q{void accept(ASTVisitor visitor) {}};
|
||||
immutable string DEFAULT_ACCEPT = q{override void accept(ASTVisitor visitor) {}};
|
||||
|
||||
template visitIfNotNull(fields ...)
|
||||
{
|
||||
|
@ -259,19 +311,28 @@ template visitIfNotNull(fields ...)
|
|||
}
|
||||
}
|
||||
|
||||
abstract class ExpressionNode : ASTNode {}
|
||||
abstract class ExpressionNode : ASTNode
|
||||
{
|
||||
public:
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
assert (false);
|
||||
}
|
||||
}
|
||||
|
||||
mixin template BinaryExpressionBody()
|
||||
{
|
||||
ExpressionNode left;
|
||||
ExpressionNode right;
|
||||
size_t line;
|
||||
size_t column;
|
||||
}
|
||||
|
||||
///
|
||||
class AddExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -283,7 +344,7 @@ public:
|
|||
class AliasDeclaration : ASTNode
|
||||
{
|
||||
public:
|
||||
void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(type, name, initializers));
|
||||
}
|
||||
|
@ -332,7 +393,7 @@ public:
|
|||
class AndAndExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -343,7 +404,7 @@ public:
|
|||
class AndExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -566,7 +627,7 @@ public:
|
|||
class AssertExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(assertion, message));
|
||||
}
|
||||
|
@ -578,7 +639,7 @@ public:
|
|||
class AssignExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(ternaryExpression, assignExpression));
|
||||
}
|
||||
|
@ -816,7 +877,7 @@ public:
|
|||
class CmpExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(shiftExpression, equalExpression,
|
||||
identityExpression, relExpression, inExpression));
|
||||
|
@ -1031,11 +1092,13 @@ public:
|
|||
class DeleteExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression));
|
||||
}
|
||||
/** */ UnaryExpression unaryExpression;
|
||||
/** */ size_t line;
|
||||
/** */ size_t column;
|
||||
}
|
||||
|
||||
///
|
||||
|
@ -1151,7 +1214,7 @@ public:
|
|||
class EqualExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1163,7 +1226,7 @@ public:
|
|||
class Expression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(items));
|
||||
}
|
||||
|
@ -1293,7 +1356,7 @@ public:
|
|||
class FunctionCallExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments));
|
||||
}
|
||||
|
@ -1306,7 +1369,7 @@ public:
|
|||
class FunctionCallStatement : ASTNode
|
||||
{
|
||||
public:
|
||||
void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(functionCallExpression));
|
||||
}
|
||||
|
@ -1338,7 +1401,7 @@ public:
|
|||
class FunctionLiteralExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(type, parameters, functionAttributes,
|
||||
functionBody));
|
||||
|
@ -1413,7 +1476,7 @@ public:
|
|||
class IdentityExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1478,7 +1541,7 @@ public:
|
|||
class ImportExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(assignExpression));
|
||||
}
|
||||
|
@ -1489,7 +1552,7 @@ public:
|
|||
class IndexExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression, argumentList));
|
||||
}
|
||||
|
@ -1501,7 +1564,7 @@ public:
|
|||
class InExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1575,7 +1638,7 @@ public:
|
|||
class IsExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(type, identifier, typeSpecialization,
|
||||
templateParameterList));
|
||||
|
@ -1626,7 +1689,7 @@ public:
|
|||
class LambdaExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(identifier, parameters, functionAttributes,
|
||||
assignExpression));
|
||||
|
@ -1689,7 +1752,7 @@ public:
|
|||
class MixinExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(assignExpression));
|
||||
}
|
||||
|
@ -1748,7 +1811,7 @@ public:
|
|||
class MulExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1760,7 +1823,7 @@ public:
|
|||
class NewAnonClassExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(allocatorArguments, constructorArguments,
|
||||
baseClassList, structBody));
|
||||
|
@ -1775,7 +1838,7 @@ public:
|
|||
class NewExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(newAnonClassExpression, type, arguments,
|
||||
assignExpression));
|
||||
|
@ -1863,7 +1926,7 @@ public:
|
|||
class OrExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1874,7 +1937,7 @@ public:
|
|||
class OrOrExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1937,7 +2000,7 @@ public:
|
|||
class PostIncDecExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression));
|
||||
}
|
||||
|
@ -1949,7 +2012,7 @@ public:
|
|||
class PowExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -1971,7 +2034,7 @@ public:
|
|||
class PragmaExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(identifier, argumentList));
|
||||
}
|
||||
|
@ -1983,7 +2046,7 @@ public:
|
|||
class PreIncDecExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression));
|
||||
}
|
||||
|
@ -1995,7 +2058,7 @@ public:
|
|||
class PrimaryExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(basicType, primary, typeofExpression,
|
||||
typeidExpression, arrayLiteral, assocArrayLiteral, expression,
|
||||
|
@ -2035,7 +2098,7 @@ public:
|
|||
class RelExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -2096,7 +2159,7 @@ public:
|
|||
class ShiftExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
@ -2120,7 +2183,7 @@ public:
|
|||
class SliceExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(unaryExpression, lower, upper));
|
||||
}
|
||||
|
@ -2409,7 +2472,7 @@ public:
|
|||
class TemplateMixinExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName));
|
||||
}
|
||||
|
@ -2534,7 +2597,7 @@ public:
|
|||
class TernaryExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression));
|
||||
}
|
||||
|
@ -2558,7 +2621,7 @@ public:
|
|||
class TraitsExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(identifier, templateArgumentList));
|
||||
}
|
||||
|
@ -2647,7 +2710,7 @@ public:
|
|||
class TypeidExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(type, expression));
|
||||
}
|
||||
|
@ -2659,7 +2722,7 @@ public:
|
|||
class TypeofExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(expression, return_));
|
||||
}
|
||||
|
@ -2671,7 +2734,7 @@ public:
|
|||
class UnaryExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
// TODO prefix, postfix, unary
|
||||
mixin (visitIfNotNull!(primaryExpression, newExpression,
|
||||
|
@ -2803,7 +2866,7 @@ public:
|
|||
class XorExpression : ExpressionNode
|
||||
{
|
||||
public:
|
||||
/+override+/ void accept(ASTVisitor visitor)
|
||||
override void accept(ASTVisitor visitor)
|
||||
{
|
||||
mixin (visitIfNotNull!(left, right));
|
||||
}
|
||||
|
|
|
@ -1874,6 +1874,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
|
|||
{
|
||||
mixin(traceEnterAndExit!(__FUNCTION__));
|
||||
auto node = new DeleteExpression;
|
||||
node.line = current.line;
|
||||
node.column = current.column;
|
||||
if (expect(tok!"delete") is null) return null;
|
||||
node.unaryExpression = parseUnaryExpression();
|
||||
return node;
|
||||
|
@ -3990,6 +3992,7 @@ q{(int a, ...)
|
|||
*/
|
||||
PragmaDeclaration parsePragmaDeclaration()
|
||||
{
|
||||
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||
auto node = new PragmaDeclaration;
|
||||
node.pragmaExpression = parsePragmaExpression();
|
||||
expect(tok!";");
|
||||
|
@ -4005,6 +4008,7 @@ q{(int a, ...)
|
|||
*/
|
||||
PragmaExpression parsePragmaExpression()
|
||||
{
|
||||
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||
auto node = new PragmaExpression;
|
||||
expect(tok!"pragma");
|
||||
expect(tok!"(");
|
||||
|
@ -4264,8 +4268,9 @@ q{(int a, ...)
|
|||
* | $(LITERAL '!<=')
|
||||
* ;)
|
||||
*/
|
||||
ExpressionNode parseRelExpression(ExpressionNode shift = null)
|
||||
ExpressionNode parseRelExpression(ExpressionNode shift)
|
||||
{
|
||||
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||
return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression,
|
||||
tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>",
|
||||
tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<",
|
||||
|
@ -6238,7 +6243,11 @@ protected:
|
|||
{
|
||||
auto n = new ExpressionType;
|
||||
static if (__traits(hasMember, ExpressionType, "operator"))
|
||||
{
|
||||
n.line = current.line;
|
||||
n.column = current.column;
|
||||
n.operator = advance().type;
|
||||
}
|
||||
else
|
||||
advance();
|
||||
n.left = node;
|
||||
|
|
355
stdx/lexer.d
355
stdx/lexer.d
|
@ -1,8 +1,99 @@
|
|||
// Written in the D programming language
|
||||
|
||||
/**
|
||||
* $(H2 Summary)
|
||||
* This module contains a range-based _lexer generator.
|
||||
*
|
||||
* $(H2 Overview)
|
||||
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
||||
* several helper templates for generating such things as token identifiers.
|
||||
*
|
||||
* To write a _lexer using this API:
|
||||
* $(OL
|
||||
* $(LI Create the string array costants for your language.
|
||||
* $(UL
|
||||
* $(LI $(LINK2 #.StringConstants, String Constants))
|
||||
* ))
|
||||
* $(LI Create aliases for the various token and token identifier types
|
||||
* specific to your language.
|
||||
* $(UL
|
||||
* $(LI $(LREF TokenIdType))
|
||||
* $(LI $(LREF tokenStringRepresentation))
|
||||
* $(LI $(LREF TokenStructure))
|
||||
* $(LI $(LREF TokenId))
|
||||
* ))
|
||||
* $(LI Create a struct that mixes in the Lexer template mixin and
|
||||
* implements the necessary functions.
|
||||
* $(UL
|
||||
* $(LI $(LREF Lexer))
|
||||
* ))
|
||||
* )
|
||||
* Examples:
|
||||
* $(UL
|
||||
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
|
||||
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
||||
* )
|
||||
* $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
|
||||
* $(DL
|
||||
* $(DT $(B staticTokens))
|
||||
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
||||
* possibly be a token handled by the default token lexing function. The
|
||||
* most common example of this kind of token is an operator such as
|
||||
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
||||
* $(DT $(B dynamicTokens))
|
||||
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
||||
* identifiers, number literals, and string literals.)
|
||||
* $(DT $(B possibleDefaultTokens))
|
||||
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
||||
* the default token handling function. An common example of this is
|
||||
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
||||
* the identifier $(D_STRING "fortunate"). isSeparating is called to
|
||||
* determine if the character after the $(D_STRING 'r') separates the
|
||||
* identifier, indicating that the token is $(D_STRING "for"), or if lexing
|
||||
* should be turned over to the defaultTokenFunction.)
|
||||
* $(DT $(B tokenHandlers))
|
||||
* $(DD A mapping of prefixes to custom token handling function names. The
|
||||
* generated _lexer will search for the even-index elements of this array,
|
||||
* and then call the function whose name is the element immedately after the
|
||||
* even-indexed element. This is used for lexing complex tokens whose prefix
|
||||
* is fixed.)
|
||||
* )
|
||||
*
|
||||
* Here are some example constants for a simple calculator _lexer:
|
||||
* ---
|
||||
* // There are a near infinite number of valid number literals, so numbers are
|
||||
* // dynamic tokens.
|
||||
* enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
|
||||
*
|
||||
* // The operators are always the same, and cannot start a numberLiteral, so
|
||||
* // they are staticTokens
|
||||
* enum string[] staticTokens = ["-", "+", "*", "/"];
|
||||
*
|
||||
* // In this simple example there are no keywords or other tokens that could
|
||||
* // look like dynamic tokens, so this is blank.
|
||||
* enum string[] possibleDefaultTokens = [];
|
||||
*
|
||||
* // If any whitespace character or digit is encountered, pass lexing over to
|
||||
* // our custom handler functions. These will be demonstrated in an example
|
||||
* // later on.
|
||||
* enum string[] tokenHandlers = [
|
||||
* "0", "lexNumber",
|
||||
* "1", "lexNumber",
|
||||
* "2", "lexNumber",
|
||||
* "3", "lexNumber",
|
||||
* "4", "lexNumber",
|
||||
* "5", "lexNumber",
|
||||
* "6", "lexNumber",
|
||||
* "7", "lexNumber",
|
||||
* "8", "lexNumber",
|
||||
* "9", "lexNumber",
|
||||
* " ", "lexWhitespace",
|
||||
* "\n", "lexWhitespace",
|
||||
* "\t", "lexWhitespace",
|
||||
* "\r", "lexWhitespace"
|
||||
* ];
|
||||
* ---
|
||||
*
|
||||
* Copyright: Brian Schott 2013
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||
* Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
||||
|
@ -16,7 +107,12 @@ module stdx.lexer;
|
|||
* unsigned integral type that is able to hold the value
|
||||
* staticTokens.length + dynamicTokens.length. For example if there are 20
|
||||
* static tokens, 30 dynamic tokens, and 10 possible default tokens, this
|
||||
* template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max.
|
||||
* template will alias itself to ubyte, as 20 + 30 + 10 < $(D_KEYWORD ubyte).max.
|
||||
* Examples:
|
||||
* ---
|
||||
* // In our calculator example this means that IdType is an alias for ubyte.
|
||||
* alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
* ---
|
||||
*/
|
||||
template TokenIdType(alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens)
|
||||
|
@ -32,7 +128,15 @@ template TokenIdType(alias staticTokens, alias dynamicTokens,
|
|||
}
|
||||
|
||||
/**
|
||||
* Looks up the string representation of the given token type.
|
||||
* Looks up the string representation of the given token type. This is the
|
||||
* opposite of the function of the TokenId template.
|
||||
* Params: type = the token type identifier
|
||||
* Examples:
|
||||
* ---
|
||||
* alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
* assert (str(tok!"*") == "*");
|
||||
* ---
|
||||
* See_also: $(LREF TokenId)
|
||||
*/
|
||||
string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property
|
||||
{
|
||||
|
@ -57,18 +161,18 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
|
|||
* valid token type identifier)
|
||||
* )
|
||||
* In all cases this template will alias itself to a constant of type IdType.
|
||||
* This template will fail at compile time if $(D_PARAM symbol) is not one of
|
||||
* the staticTokens, dynamicTokens, or possibleDefaultTokens.
|
||||
* Examples:
|
||||
* ---
|
||||
* enum string[] staticTokens = ["+", "-", "*", "/"];
|
||||
* enum string[] dynamicTokens = ["number"];
|
||||
* enum string[] possibleDefaultTokens = [];
|
||||
* alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
* template tok(string symbol)
|
||||
* {
|
||||
* alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
|
||||
* possibleDefaultTokens, symbol);
|
||||
* }
|
||||
* // num and plus are of type ubyte.
|
||||
* IdType plus = tok!"+";
|
||||
* IdType num = tok!"numberLiteral";
|
||||
* ---
|
||||
*/
|
||||
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
||||
|
@ -118,35 +222,49 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
|||
/**
|
||||
* The token that is returned by the lexer.
|
||||
* Params:
|
||||
* IDType = The D type of the "type" token type field.
|
||||
* IdType = The D type of the "type" token type field.
|
||||
* extraFields = A string containing D code for any extra fields that should
|
||||
* be included in the token structure body. This string is passed
|
||||
* directly to a mixin statement.
|
||||
* Examples:
|
||||
* ---
|
||||
* // No extra struct fields are desired in this example, so leave it blank.
|
||||
* alias Token = TokenStructure!(IdType, "");
|
||||
* Token minusToken = Token(tok!"-");
|
||||
* ---
|
||||
*/
|
||||
struct TokenStructure(IDType, string extraFields = "")
|
||||
struct TokenStructure(IdType, string extraFields = "")
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* == overload for the the token type.
|
||||
*/
|
||||
bool opEquals(IDType type) const pure nothrow @safe
|
||||
bool opEquals(IdType type) const pure nothrow @safe
|
||||
{
|
||||
return this.type == type;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Constructs a token from a token type.
|
||||
* Params: type = the token type
|
||||
*/
|
||||
this(IDType type)
|
||||
this(IdType type)
|
||||
{
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Constructs a token.
|
||||
* Params:
|
||||
* type = the token type
|
||||
* text = the text of the token, which may be null
|
||||
* line = the line number at which this token occurs
|
||||
* column = the column nmuber at which this token occurs
|
||||
* index = the byte offset from the beginning of the input at which this
|
||||
* token occurs
|
||||
*/
|
||||
this(IDType type, string text, size_t line, size_t column, size_t index)
|
||||
this(IdType type, string text, size_t line, size_t column, size_t index)
|
||||
{
|
||||
this.text = text;
|
||||
this.line = line;
|
||||
|
@ -156,39 +274,105 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* The _text of the token.
|
||||
*/
|
||||
string text;
|
||||
|
||||
/**
|
||||
*
|
||||
* The line number at which this token occurs.
|
||||
*/
|
||||
size_t line;
|
||||
|
||||
/**
|
||||
*
|
||||
* The Column nmuber at which this token occurs.
|
||||
*/
|
||||
size_t column;
|
||||
|
||||
/**
|
||||
*
|
||||
* The byte offset from the beginning of the input at which this token
|
||||
* occurs.
|
||||
*/
|
||||
size_t index;
|
||||
|
||||
/**
|
||||
*
|
||||
* The token type.
|
||||
*/
|
||||
IDType type;
|
||||
IdType type;
|
||||
|
||||
mixin (extraFields);
|
||||
}
|
||||
|
||||
/**
|
||||
* The implementation of the _lexer is contained within this mixin template.
|
||||
* To use it, this template should be mixed in to a struct that represents the
|
||||
* _lexer for your language. This struct should implement the following methods:
|
||||
* $(UL
|
||||
* $(LI popFront, which should call this mixin's _popFront() and
|
||||
* additionally perform any token filtering or shuffling you deem
|
||||
* necessary. For example, you can implement popFront to skip comment or
|
||||
* tokens.)
|
||||
* $(LI A function that serves as the default token lexing function. For
|
||||
* most languages this will be the identifier lexing function.)
|
||||
* $(LI A function that is able to determine if an identifier/keyword has
|
||||
* come to an end. This function must retorn $(D_KEYWORD bool) and take
|
||||
* a single $(D_KEYWORD size_t) argument representing the number of
|
||||
* bytes to skip over before looking for a separating character.)
|
||||
* $(LI Any functions referred to in the tokenHandlers template paramater.
|
||||
* These functions must be marked $(D_KEYWORD pure nothrow), take no
|
||||
* arguments, and return a token)
|
||||
* $(LI A constructor that initializes the range field as well as calls
|
||||
* popFront() exactly once (to initialize the _front field).)
|
||||
* )
|
||||
* Examples:
|
||||
* ---
|
||||
* struct CalculatorLexer
|
||||
* {
|
||||
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||
* staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
||||
*
|
||||
* this (ubyte[] bytes)
|
||||
* {
|
||||
* this.range = LexerRange(bytes);
|
||||
* popFront();
|
||||
* }
|
||||
*
|
||||
* void popFront() pure
|
||||
* {
|
||||
* _popFront();
|
||||
* }
|
||||
*
|
||||
* Token lexNumber() pure nothrow @safe
|
||||
* {
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* Token lexWhitespace() pure nothrow @safe
|
||||
* {
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* Token defaultTokenFunction() pure nothrow @safe
|
||||
* {
|
||||
* // There is no default token in the example calculator language, so
|
||||
* // this is always an error.
|
||||
* range.popFront();
|
||||
* return Token(tok!"");
|
||||
* }
|
||||
*
|
||||
* bool isSeparating(size_t offset) pure nothrow @safe
|
||||
* {
|
||||
* // For this example language, always return true.
|
||||
* return true;
|
||||
* }
|
||||
* }
|
||||
* ---
|
||||
*/
|
||||
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
||||
alias tokenHandlers, alias possibleDefaultTokens)
|
||||
{
|
||||
|
||||
static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
||||
static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
||||
~ " have a corresponding handler function name.");
|
||||
|
||||
static string generateMask(const ubyte[] arr)
|
||||
|
@ -214,7 +398,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
import std.string;
|
||||
import std.range;
|
||||
|
||||
string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2));
|
||||
string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2));
|
||||
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
|
||||
string code;
|
||||
for (size_t i = 0; i < allTokens.length; i++)
|
||||
|
@ -240,7 +424,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||
{
|
||||
return " return "
|
||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
else if (staticTokens.countUntil(tokens[0]) >= 0)
|
||||
|
@ -251,7 +435,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||
{
|
||||
return " return "
|
||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
}
|
||||
|
@ -271,14 +455,14 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
if (token.length <= 8)
|
||||
{
|
||||
code ~= " return "
|
||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
||||
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
code ~= " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
|
||||
code ~= " return "
|
||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
||||
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
}
|
||||
|
@ -325,16 +509,23 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive front().
|
||||
*/
|
||||
ref const(Token) front() pure nothrow const @property
|
||||
{
|
||||
return _front;
|
||||
}
|
||||
|
||||
|
||||
void _popFront() pure
|
||||
{
|
||||
_front = advance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive empty().
|
||||
*/
|
||||
bool empty() pure const nothrow @property
|
||||
{
|
||||
return _front.type == tok!"\0";
|
||||
|
@ -359,9 +550,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
return retVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* This only exists because the real array() can't be called at compile-time
|
||||
*/
|
||||
// This only exists because the real array() can't be called at compile-time
|
||||
static string[] stupidToArray(R)(R range)
|
||||
{
|
||||
string[] retVal;
|
||||
|
@ -397,13 +586,30 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The lexer input.
|
||||
*/
|
||||
LexerRange range;
|
||||
|
||||
/**
|
||||
* The token that is currently at the front of the range.
|
||||
*/
|
||||
Token _front;
|
||||
}
|
||||
|
||||
/**
|
||||
* Range structure that wraps the _lexer's input.
|
||||
*/
|
||||
struct LexerRange
|
||||
{
|
||||
|
||||
/**
|
||||
* Params:
|
||||
* bytes = the _lexer input
|
||||
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
||||
* column = the initial column number
|
||||
* line = the initial line number
|
||||
*/
|
||||
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
||||
{
|
||||
this.bytes = bytes;
|
||||
|
@ -412,31 +618,52 @@ struct LexerRange
|
|||
this.line = line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: a mark at the current position that can then be used with slice.
|
||||
*/
|
||||
size_t mark() const nothrow pure @safe
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the range to the given position
|
||||
* Params: m = the position to seek to
|
||||
*/
|
||||
void seek(size_t m) nothrow pure @safe
|
||||
{
|
||||
index = m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returs a slice of the input byte array betwene the given mark and the
|
||||
* current position.
|
||||
* Params m = the beginning index of the slice to return
|
||||
*/
|
||||
const(ubyte)[] slice(size_t m) const nothrow pure @safe
|
||||
{
|
||||
return bytes[m .. index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _empty.
|
||||
*/
|
||||
bool empty() const nothrow pure @safe
|
||||
{
|
||||
return index >= bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _front.
|
||||
*/
|
||||
ubyte front() const nothrow pure @safe
|
||||
{
|
||||
return bytes[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: the current item as well as the items $(D_PARAM p) items ahead.
|
||||
*/
|
||||
const(ubyte)[] peek(size_t p) const nothrow pure @safe
|
||||
{
|
||||
return index + p + 1 > bytes.length
|
||||
|
@ -444,48 +671,79 @@ struct LexerRange
|
|||
: bytes[index .. index + p + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
ubyte peekAt(size_t offset) const nothrow pure @safe
|
||||
{
|
||||
return bytes[index + offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: true if it is possible to peek $(D_PARAM p) bytes ahead.
|
||||
*/
|
||||
bool canPeek(size_t p) const nothrow pure @safe
|
||||
{
|
||||
return index + p < bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _popFront.
|
||||
*/
|
||||
void popFront() pure nothrow @safe
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the algorithm _popFrontN more efficiently.
|
||||
*/
|
||||
void popFrontN(size_t n) pure nothrow @safe
|
||||
{
|
||||
index += n;
|
||||
column += n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the range's line number and resets the column counter.
|
||||
*/
|
||||
void incrementLine() pure nothrow @safe
|
||||
{
|
||||
column = 1;
|
||||
line++;
|
||||
}
|
||||
|
||||
/**
|
||||
* The input _bytes.
|
||||
*/
|
||||
const(ubyte)[] bytes;
|
||||
|
||||
/**
|
||||
* The range's current position.
|
||||
*/
|
||||
size_t index;
|
||||
|
||||
/**
|
||||
* The current _column number.
|
||||
*/
|
||||
size_t column;
|
||||
|
||||
/**
|
||||
* The current _line number.
|
||||
*/
|
||||
size_t line;
|
||||
}
|
||||
|
||||
/**
|
||||
* The string cache should be used within lexer implementations for several
|
||||
* reasons:
|
||||
* $(UL
|
||||
* $(LI Reducing memory consumption.)
|
||||
* $(LI Increasing performance in token comparisons)
|
||||
* $(LI Correctly creating immutable token text if the lexing source is not
|
||||
* immutable)
|
||||
* )
|
||||
* The string cache implements a map/set for strings. Placing a string in the
|
||||
* cache returns an identifier that can be used to instantly access the stored
|
||||
* string. It is then possible to simply compare these indexes instead of
|
||||
* performing full string comparisons when comparing the string content of
|
||||
* dynamic tokens. The string cache also handles its own memory, so that mutable
|
||||
* ubyte[] to lexers can still have immutable string fields in their tokens.
|
||||
* Because the string cache also performs de-duplication it is possible to
|
||||
* drastically reduce the memory usage of a lexer.
|
||||
*/
|
||||
struct StringCache
|
||||
{
|
||||
|
@ -493,7 +751,10 @@ public:
|
|||
|
||||
@disable this();
|
||||
|
||||
this(size_t bucketCount = defaultBucketCount)
|
||||
/**
|
||||
* Params: bucketCount = the initial number of buckets.
|
||||
*/
|
||||
this(size_t bucketCount)
|
||||
{
|
||||
buckets = new Item*[bucketCount];
|
||||
}
|
||||
|
@ -512,6 +773,9 @@ public:
|
|||
return get(cache(bytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* Equivalent to calling cache() and get().
|
||||
*/
|
||||
string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
|
||||
{
|
||||
return get(cache(bytes, hash));
|
||||
|
@ -536,6 +800,11 @@ public:
|
|||
return cache(bytes, hash);
|
||||
}
|
||||
|
||||
/**
|
||||
* Caches a string as above, but uses the given has code instead of
|
||||
* calculating one itself. Use this alongside hashStep() can reduce the
|
||||
* amount of work necessary when lexing dynamic tokens.
|
||||
*/
|
||||
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
||||
in
|
||||
{
|
||||
|
@ -583,11 +852,21 @@ public:
|
|||
writeln("rehashes: ", rehashCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Incremental hashing.
|
||||
* Params:
|
||||
* b = the byte to add to the hash
|
||||
* h = the hash that has been calculated so far
|
||||
* Returns: the new hash code for the string.
|
||||
*/
|
||||
static uint hashStep(ubyte b, uint h) pure nothrow @safe
|
||||
{
|
||||
return (h ^ sbox[b]) * 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* The default bucket count for the string cache.
|
||||
*/
|
||||
static enum defaultBucketCount = 2048;
|
||||
|
||||
private:
|
||||
|
|
|
@ -0,0 +1,483 @@
|
|||
<h1>stdx.lexer</h1> <!-- Generated by Ddoc from lexer.d -->
|
||||
This module contains a range-based lexer generator.
|
||||
<p></p>
|
||||
The lexer generator consists of a template mixin, Lexer, along with several
|
||||
helper templates for generating such things as token identifiers.
|
||||
<p></p>
|
||||
|
||||
To generate a lexer using this API, several constants must be supplied:
|
||||
<dl><dt>staticTokens</dt>
|
||||
<dd>A listing of the tokens whose exact value never changes and which cannot
|
||||
possibly be a token handled by the default token lexing function. The
|
||||
most common example of this kind of token is an operator such as "*", or
|
||||
"-" in a programming language.</dd>
|
||||
<dt>dynamicTokens</dt>
|
||||
<dd>A listing of tokens whose value is variable, such as whitespace,
|
||||
identifiers, number literals, and string literals.</dd>
|
||||
<dt>possibleDefaultTokens</dt>
|
||||
<dd>A listing of tokens that could posibly be one of the tokens handled by
|
||||
the default token handling function. An common example of this is
|
||||
a keyword such as <span class="d_string">"for"</span>, which looks like the beginning of
|
||||
the identifier <span class="d_string">"fortunate"</span>. isSeparating is called to
|
||||
determine if the character after the <span class="d_string">'r'</span> separates the
|
||||
identifier, indicating that the token is <span class="d_string">"for"</span>, or if lexing
|
||||
should be turned over to the defaultTokenFunction.</dd>
|
||||
<dt>tokenHandlers</dt>
|
||||
<dd>A mapping of prefixes to custom token handling function names. The
|
||||
generated lexer will search for the even-index elements of this array,
|
||||
and then call the function whose name is the element immedately after the
|
||||
even-indexed element. This is used for lexing complex tokens whose prefix
|
||||
is fixed.</dd>
|
||||
</dl>
|
||||
<p></p>
|
||||
|
||||
Here are some example constants for a simple calculator lexer:
|
||||
<pre class="d_code"><span class="d_comment">// There are a near infinite number of valid number literals, so numbers are
|
||||
</span><span class="d_comment">// dynamic tokens.
|
||||
</span><span class="d_keyword">enum</span> string[] dynamicTokens = [<span class="d_string">"numberLiteral"</span>, <span class="d_string">"whitespace"</span>];
|
||||
|
||||
<span class="d_comment">// The operators are always the same, and cannot start a numberLiteral, so
|
||||
</span><span class="d_comment">// they are staticTokens
|
||||
</span><span class="d_keyword">enum</span> string[] staticTokens = [<span class="d_string">"-"</span>, <span class="d_string">"+"</span>, <span class="d_string">"*"</span>, <span class="d_string">"/"</span>];
|
||||
|
||||
<span class="d_comment">// In this simple example there are no keywords or other tokens that could
|
||||
</span><span class="d_comment">// look like dynamic tokens, so this is blank.
|
||||
</span><span class="d_keyword">enum</span> string[] possibleDefaultTokens = [];
|
||||
|
||||
<span class="d_comment">// If any whitespace character or digit is encountered, pass lexing over to
|
||||
</span><span class="d_comment">// our custom handler functions. These will be demonstrated in an example
|
||||
</span><span class="d_comment">// later on.
|
||||
</span><span class="d_keyword">enum</span> string[] tokenHandlers = [
|
||||
<span class="d_string">"0"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"1"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"2"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"3"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"4"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"5"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"6"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"7"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"8"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">"9"</span>, <span class="d_string">"lexNumber"</span>,
|
||||
<span class="d_string">" "</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||
<span class="d_string">"\n"</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||
<span class="d_string">"\t"</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||
<span class="d_string">"\r"</span>, <span class="d_string">"lexWhitespace"</span>
|
||||
];
|
||||
</pre>
|
||||
|
||||
<p></p>
|
||||
<b>Examples:</b><br><ul><li>A lexer for D is available <a href="https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d">here</a>.</li>
|
||||
<li>A lexer for Lua is available <a href="https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d">here</a>.</li>
|
||||
</ul>
|
||||
<p></p>
|
||||
<b>License:</b><br><a href="http://www.boost.org/LICENSE_1_0.txt Boost">License 1.0</a>
|
||||
<p></p>
|
||||
<b>Authors:</b><br>Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
||||
<p></p>
|
||||
<b>Source:</b><br>
|
||||
<a href="https://github.com/D-Programming-Language/phobos/blob/master/std/lexer.d">std/lexer.d</a><p></p>
|
||||
|
||||
<dl><dt class="d_decl"><a name=".TokenIdType"></a>template <a name="TokenIdType"></a><span class="ddoc_psymbol">TokenIdType</span>(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)</dt>
|
||||
<dd>Template for determining the type used for a token type. Selects the smallest
|
||||
unsigned integral type that is able to hold the value
|
||||
staticTokens.length + dynamicTokens.length. For example if there are 20
|
||||
static tokens, 30 dynamic tokens, and 10 possible default tokens, this
|
||||
template will alias itself to ubyte, as 20 + 30 + 10 < <span class="d_keyword">ubyte</span>.max.
|
||||
<p></p>
|
||||
<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// In our calculator example this means that IdType is an alias for ubyte.
|
||||
</span><span class="d_keyword">alias</span> IdType = <span class="d_psymbol">TokenIdType</span>!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".tokenStringRepresentation"></a>@property string <a name="tokenStringRepresentation"></a><span class="ddoc_psymbol">tokenStringRepresentation</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType <i>type</i>);
|
||||
</dt>
|
||||
<dd>Looks up the string representation of the given token type. This is the
|
||||
opposite of the function of the TokenId template.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType type</td>
|
||||
<td valign=top>the token type identifier</td></tr>
|
||||
</table><p></p>
|
||||
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">alias</span> str = <span class="d_psymbol">tokenStringRepresentation</span>(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
<span class="d_keyword">assert</span> (str(tok!<span class="d_string">"*"</span>) == <span class="d_string">"*"</span>);
|
||||
</pre>
|
||||
<p></p>
|
||||
<b>See Also:</b><br>TokenId<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".TokenId"></a>template <a name="TokenId"></a><span class="ddoc_psymbol">TokenId</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)</dt>
|
||||
<dd>Generates the token type identifier for the given symbol. There are two
|
||||
special cases:
|
||||
<ul> <li>If symbol is "", then the token identifier will be 0</li>
|
||||
<li>If symbol is "\0", then the token identifier will be the maximum
|
||||
valid token type identifier</li>
|
||||
</ul>
|
||||
In all cases this template will alias itself to a constant of type IdType.
|
||||
This template will fail at compile time if <span class="d_param">symbol</span> is not one of
|
||||
the staticTokens, dynamicTokens, or possibleDefaultTokens.
|
||||
<p></p>
|
||||
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">template</span> tok(string symbol)
|
||||
{
|
||||
<span class="d_keyword">alias</span> tok = <span class="d_psymbol">TokenId</span>!(IdType, staticTokens, dynamicTokens,
|
||||
possibleDefaultTokens, symbol);
|
||||
}
|
||||
<span class="d_comment">// num and plus are of type ubyte.
|
||||
</span>IdType plus = tok!<span class="d_string">"+"</span>;
|
||||
IdType num = tok!<span class="d_string">"numberLiteral"</span>;
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".TokenStructure"></a>struct <a name="TokenStructure"></a><span class="ddoc_psymbol">TokenStructure</span>(IdType, string extraFields = "");
|
||||
</dt>
|
||||
<dd>The token that is returned by the lexer.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType</td>
|
||||
<td valign=top>The D type of the "type" token type field.</td></tr>
|
||||
<tr><td valign=top>extraFields</td>
|
||||
<td valign=top>A string containing D code for any extra fields that should
|
||||
be included in the token structure body. This string is passed
|
||||
directly to a mixin statement.</td></tr>
|
||||
</table><p></p>
|
||||
<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// No extra struct fields are desired in this example, so leave it blank.
|
||||
</span><span class="d_keyword">alias</span> Token = <span class="d_psymbol">TokenStructure</span>!(IdType, <span class="d_string">""</span>);
|
||||
Token minusToken = Token(tok!<span class="d_string">"-"</span>);
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
<dl><dt class="d_decl"><a name=".opEquals"></a>const pure nothrow @safe bool <a name="opEquals"></a><span class="ddoc_psymbol">opEquals</span>(IdType <i>type</i>);
|
||||
</dt>
|
||||
<dd>== overload for the the token <i>type</i>.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>);
|
||||
</dt>
|
||||
<dd>Constructs a token from a token <i>type</i>.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
|
||||
<td valign=top>the token <i>type</i></td></tr>
|
||||
</table><p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>, string <i>text</i>, size_t <i>line</i>, size_t <i>column</i>, size_t <i>index</i>);
|
||||
</dt>
|
||||
<dd>Constructs a token.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
|
||||
<td valign=top>the token <i>type</i></td></tr>
|
||||
<tr><td valign=top>string <i>text</i></td>
|
||||
<td valign=top>the <i>text</i> of the token, which may be <b>null</b></td></tr>
|
||||
<tr><td valign=top>size_t <i>line</i></td>
|
||||
<td valign=top>the <i>line</i> number at which this token occurs</td></tr>
|
||||
<tr><td valign=top>size_t <i>column</i></td>
|
||||
<td valign=top>the <i>column</i> nmuber at which this token occurs</td></tr>
|
||||
<tr><td valign=top>size_t <i>index</i></td>
|
||||
<td valign=top>the byte offset from the beginning of the input at which this
|
||||
token occurs</td></tr>
|
||||
</table><p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".text"></a>string <a name="text"></a><span class="ddoc_psymbol">text</span>;
|
||||
</dt>
|
||||
<dd>The <a name="text"></a><span class="ddoc_psymbol">text</span> of the token.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
|
||||
</dt>
|
||||
<dd>The <a name="line"></a><span class="ddoc_psymbol">line</span> number at which this token occurs.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
|
||||
</dt>
|
||||
<dd>The Column nmuber at which this token occurs.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
|
||||
</dt>
|
||||
<dd>The byte offset from the beginning of the input at which this token
|
||||
occurs.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".type"></a>IdType <a name="type"></a><span class="ddoc_psymbol">type</span>;
|
||||
</dt>
|
||||
<dd>The token <a name="type"></a><span class="ddoc_psymbol">type</span>.<p></p>
|
||||
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".Lexer"></a>template <a name="Lexer"></a><span class="ddoc_psymbol">Lexer</span>(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)</dt>
|
||||
<dd>The implementation of the lexer is contained within this mixin template.
|
||||
To use it, this template should be mixed in to a struct that represents the
|
||||
lexer for your language. This struct should implement the following methods:
|
||||
<ul> <li>popFront, which should call this mixin's popFront() and
|
||||
additionally perform any token filtering or shuffling you deem
|
||||
necessary. For example, you can implement popFront to skip comment or
|
||||
tokens.</li>
|
||||
<li>A function that serves as the default token lexing function. For
|
||||
most languages this will be the identifier lexing function.</li>
|
||||
<li>A function that is able to determine if an identifier/keyword has
|
||||
come to an end. This function must retorn <span class="d_keyword">bool</span> and take
|
||||
a single <span class="d_keyword">size_t</span> argument representing the number of
|
||||
bytes to skip over before looking for a separating character.</li>
|
||||
<li>Any functions referred to in the tokenHandlers template paramater.
|
||||
These functions must be marked <span class="d_keyword">pure nothrow</span>, take no
|
||||
arguments, and return a token</li>
|
||||
<li>A constructor that initializes the range field as well as calls
|
||||
popFront() exactly once (to initialize the front field).</li>
|
||||
</ul>
|
||||
<p></p>
|
||||
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">struct</span> CalculatorLexer
|
||||
{
|
||||
<span class="d_keyword">mixin</span> <span class="d_psymbol">Lexer</span>!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||
staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
||||
|
||||
<span class="d_keyword">this</span> (<span class="d_keyword">ubyte</span>[] bytes)
|
||||
{
|
||||
<span class="d_keyword">this</span>.range = LexerRange(bytes);
|
||||
popFront();
|
||||
}
|
||||
|
||||
<span class="d_keyword">void</span> popFront() <span class="d_keyword">pure</span>
|
||||
{
|
||||
_popFront();
|
||||
}
|
||||
|
||||
Token lexNumber() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||
{
|
||||
...
|
||||
}
|
||||
|
||||
Token lexWhitespace() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||
{
|
||||
...
|
||||
}
|
||||
|
||||
Token defaultTokenFunction() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||
{
|
||||
<span class="d_comment">// There is no default token in the example calculator language, so
|
||||
</span> <span class="d_comment">// this is always an error.
|
||||
</span> range.popFront();
|
||||
<span class="d_keyword">return</span> Token(tok!<span class="d_string">""</span>);
|
||||
}
|
||||
|
||||
<span class="d_keyword">bool</span> isSeparating(size_t offset) <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||
{
|
||||
<span class="d_comment">// For this example language, always return true.
|
||||
</span> <span class="d_keyword">return</span> <span class="d_keyword">true</span>;
|
||||
}
|
||||
}
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
<dl><dt class="d_decl"><a name=".front"></a>const pure nothrow @property const(Token) <a name="front"></a><span class="ddoc_psymbol">front</span>();
|
||||
</dt>
|
||||
<dd>Implements the range primitive <a name="front"></a><span class="ddoc_psymbol">front</span>().<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".empty"></a>const pure nothrow @property bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
|
||||
</dt>
|
||||
<dd>Implements the range primitive <a name="empty"></a><span class="ddoc_psymbol">empty</span>().<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".range"></a>LexerRange <a name="range"></a><span class="ddoc_psymbol">range</span>;
|
||||
</dt>
|
||||
<dd>The lexer input.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name="._front"></a>Token <a name="_front"></a><span class="ddoc_psymbol">_front</span>;
|
||||
</dt>
|
||||
<dd>The token that is currently at the front of the range.<p></p>
|
||||
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange"></a>struct <a name="LexerRange"></a><span class="ddoc_psymbol">LexerRange</span>;
|
||||
</dt>
|
||||
<dd>Range structure that wraps the lexer's input.<p></p>
|
||||
|
||||
<dl><dt class="d_decl"><a name=".LexerRange.this"></a>pure nothrow @safe this(const(ubyte)[] <i>bytes</i>, size_t <i>index</i> = 0, size_t <i>column</i> = 1, size_t <i>line</i> = 1);
|
||||
</dt>
|
||||
<dd><b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
|
||||
<td valign=top>the lexer input</td></tr>
|
||||
<tr><td valign=top>size_t <i>index</i></td>
|
||||
<td valign=top>the initial offset from the beginning of <span class="d_param"><i>bytes</i></span></td></tr>
|
||||
<tr><td valign=top>size_t <i>column</i></td>
|
||||
<td valign=top>the initial <i>column</i> number</td></tr>
|
||||
<tr><td valign=top>size_t <i>line</i></td>
|
||||
<td valign=top>the initial <i>line</i> number</td></tr>
|
||||
</table><p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.mark"></a>const pure nothrow @safe size_t <a name="mark"></a><span class="ddoc_psymbol">mark</span>();
|
||||
</dt>
|
||||
<dd><b>Returns:</b><br>a <a name="mark"></a><span class="ddoc_psymbol">mark</span> at the current position that can then be used with slice.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.seek"></a>pure nothrow @safe void <a name="seek"></a><span class="ddoc_psymbol">seek</span>(size_t <i>m</i>);
|
||||
</dt>
|
||||
<dd>Sets the range to the given position
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>m</i></td>
|
||||
<td valign=top>the position to <a name="seek"></a><span class="ddoc_psymbol">seek</span> to</td></tr>
|
||||
</table><p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.slice"></a>const pure nothrow @safe const(ubyte)[] <a name="slice"></a><span class="ddoc_psymbol">slice</span>(size_t <i>m</i>);
|
||||
</dt>
|
||||
<dd>Returs a <a name="slice"></a><span class="ddoc_psymbol">slice</span> of the input byte array betwene the given mark and the
|
||||
current position.
|
||||
Params <i>m</i> = the beginning index of the <a name="slice"></a><span class="ddoc_psymbol">slice</span> to return<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.empty"></a>const pure nothrow @safe bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
|
||||
</dt>
|
||||
<dd>Implements the range primitive empty.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.front"></a>const pure nothrow @safe ubyte <a name="front"></a><span class="ddoc_psymbol">front</span>();
|
||||
</dt>
|
||||
<dd>Implements the range primitive front.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.peek"></a>const pure nothrow @safe const(ubyte)[] <a name="peek"></a><span class="ddoc_psymbol">peek</span>(size_t <i>p</i>);
|
||||
</dt>
|
||||
<dd><b>Returns:</b><br>the current item as well as the items <span class="d_param"><i>p</i></span> items ahead.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.peekAt"></a>const pure nothrow @safe ubyte <a name="peekAt"></a><span class="ddoc_psymbol">peekAt</span>(size_t <i>offset</i>);
|
||||
</dt>
|
||||
<dd><p></p>
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.canPeek"></a>const pure nothrow @safe bool <a name="canPeek"></a><span class="ddoc_psymbol">canPeek</span>(size_t <i>p</i>);
|
||||
</dt>
|
||||
<dd><b>Returns:</b><br><b>true</b> if it is possible to peek <span class="d_param"><i>p</i></span> bytes ahead.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.popFront"></a>pure nothrow @safe void <a name="popFront"></a><span class="ddoc_psymbol">popFront</span>();
|
||||
</dt>
|
||||
<dd>Implements the range primitive popFront.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.popFrontN"></a>pure nothrow @safe void <a name="popFrontN"></a><span class="ddoc_psymbol">popFrontN</span>(size_t <i>n</i>);
|
||||
</dt>
|
||||
<dd>Implements the algorithm popFrontN more efficiently.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.incrementLine"></a>pure nothrow @safe void <a name="incrementLine"></a><span class="ddoc_psymbol">incrementLine</span>();
|
||||
</dt>
|
||||
<dd>Increments the range's line number and resets the column counter.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.bytes"></a>const(ubyte)[] <a name="bytes"></a><span class="ddoc_psymbol">bytes</span>;
|
||||
</dt>
|
||||
<dd>The input bytes.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
|
||||
</dt>
|
||||
<dd>The range's current position.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
|
||||
</dt>
|
||||
<dd>The current column number.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".LexerRange.line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
|
||||
</dt>
|
||||
<dd>The current line number.<p></p>
|
||||
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache"></a>struct <a name="StringCache"></a><span class="ddoc_psymbol">StringCache</span>;
|
||||
</dt>
|
||||
<dd>The string cache implements a map/set for strings. Placing a string in the
|
||||
cache returns an identifier that can be used to instantly access the stored
|
||||
string. It is then possible to simply compare these indexes instead of
|
||||
performing full string comparisons when comparing the string content of
|
||||
dynamic tokens. The string cache also handles its own memory, so that mutable
|
||||
ubyte[] to lexers can still have immutable string fields in their tokens.
|
||||
Because the string cache also performs de-duplication it is possible to
|
||||
drastically reduce the memory usage of a lexer.<p></p>
|
||||
|
||||
<dl><dt class="d_decl"><a name=".StringCache.this"></a> this(size_t <i>bucketCount</i>);
|
||||
</dt>
|
||||
<dd><b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>bucketCount</i></td>
|
||||
<td valign=top>the initial number of buckets.</td></tr>
|
||||
</table><p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>);
|
||||
</dt>
|
||||
<dd>Equivalent to calling cache() and get().
|
||||
<pre class="d_code">StringCache cache;
|
||||
<span class="d_keyword">ubyte</span>[] str = ['a', 'b', 'c'];
|
||||
string s = cache.get(cache.cache(str));
|
||||
<span class="d_keyword">assert</span>(s == <span class="d_string">"abc"</span>);
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>, uint <i>hash</i>);
|
||||
</dt>
|
||||
<dd>Equivalent to calling cache() and get().<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>);
|
||||
</dt>
|
||||
<dd>Caches a string.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
|
||||
<td valign=top>the string to <a name="cache"></a><span class="ddoc_psymbol">cache</span></td></tr>
|
||||
</table><p></p>
|
||||
<b>Returns:</b><br>A key that can be used to retrieve the cached string
|
||||
<p></p>
|
||||
<b>Examples:</b><br><pre class="d_code">StringCache <span class="d_psymbol">cache</span>;
|
||||
<span class="d_keyword">ubyte</span>[] <span class="d_param">bytes</span> = ['a', 'b', 'c'];
|
||||
size_t first = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
|
||||
size_t second = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
|
||||
<span class="d_keyword">assert</span> (first == second);
|
||||
</pre>
|
||||
<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>, uint <i>hash</i>);
|
||||
</dt>
|
||||
<dd>Caches a string as above, but uses the given has code instead of
|
||||
calculating one itself. Use this alongside hashStep() can reduce the
|
||||
amount of work necessary when lexing dynamic tokens.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.get"></a>const pure nothrow @safe string <a name="get"></a><span class="ddoc_psymbol">get</span>(size_t <i>index</i>);
|
||||
</dt>
|
||||
<dd>Gets a cached string based on its key.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>index</i></td>
|
||||
<td valign=top>the key</td></tr>
|
||||
</table><p></p>
|
||||
<b>Returns:</b><br>the cached string<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.hashStep"></a>static pure nothrow @safe uint <a name="hashStep"></a><span class="ddoc_psymbol">hashStep</span>(ubyte <i>b</i>, uint <i>h</i>);
|
||||
</dt>
|
||||
<dd>Incremental hashing.
|
||||
<p></p>
|
||||
<b>Parameters:</b><table class=parms><tr><td valign=top>ubyte <i>b</i></td>
|
||||
<td valign=top>the byte to add to the hash</td></tr>
|
||||
<tr><td valign=top>uint <i>h</i></td>
|
||||
<td valign=top>the hash that has been calculated so far</td></tr>
|
||||
</table><p></p>
|
||||
<b>Returns:</b><br>the new hash code for the string.<p></p>
|
||||
|
||||
</dd>
|
||||
<dt class="d_decl"><a name=".StringCache.defaultBucketCount"></a>static int <a name="defaultBucketCount"></a><span class="ddoc_psymbol">defaultBucketCount</span>;
|
||||
</dt>
|
||||
<dd>The default bucket count for the string cache.<p></p>
|
||||
|
||||
</dd>
|
||||
</dl>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<table width=100%><tr><td><hr align="left" size="8" width="100%" color="maroon" /></td><td width=5%><a href=#top>[top]</a></td></tr></table>
|
Binary file not shown.
Loading…
Reference in New Issue