Updated lexer docs. Implemented delete and fp operator rules. Fixed bug with AST traversal
This commit is contained in:
parent
2f78272fed
commit
d13d680b74
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright Brian Schott (Sir Alaran) 2014.
|
||||||
|
// Distributed under the Boost Software License, Version 1.0.
|
||||||
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
// http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
module analysis.del;
|
||||||
|
|
||||||
|
import stdx.d.ast;
|
||||||
|
import stdx.d.lexer;
|
||||||
|
import analysis.base;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks for use of the deprecated "delete" keyword
|
||||||
|
*/
|
||||||
|
class DeleteCheck : BaseAnalyzer
|
||||||
|
{
|
||||||
|
alias visit = BaseAnalyzer.visit;
|
||||||
|
|
||||||
|
this(string fileName)
|
||||||
|
{
|
||||||
|
super(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
override void visit(DeleteExpression d)
|
||||||
|
{
|
||||||
|
addErrorMessage(d.line, d.column, "Avoid using the deprecated delete keyword");
|
||||||
|
d.accept(this);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
// Copyright Brian Schott (Sir Alaran) 2014.
|
||||||
|
// Distributed under the Boost Software License, Version 1.0.
|
||||||
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
// http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
module analysis.fish;
|
||||||
|
|
||||||
|
import stdx.d.ast;
|
||||||
|
import stdx.d.lexer;
|
||||||
|
import analysis.base;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks for use of the deprecated floating point comparison operators.
|
||||||
|
*/
|
||||||
|
class FloatOperatorCheck : BaseAnalyzer
|
||||||
|
{
|
||||||
|
alias visit = BaseAnalyzer.visit;
|
||||||
|
|
||||||
|
this(string fileName)
|
||||||
|
{
|
||||||
|
super(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
override void visit(RelExpression r)
|
||||||
|
{
|
||||||
|
if (r.operator == tok!"<>"
|
||||||
|
|| r.operator == tok!"!<>"
|
||||||
|
|| r.operator == tok!"!>"
|
||||||
|
|| r.operator == tok!"!<"
|
||||||
|
|| r.operator == tok!"!<>="
|
||||||
|
|| r.operator == tok!"!>="
|
||||||
|
|| r.operator == tok!"!<=")
|
||||||
|
{
|
||||||
|
addErrorMessage(r.line, r.column, "Avoid using the deprecated floating-point operators");
|
||||||
|
}
|
||||||
|
r.accept(this);
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,6 +15,8 @@ import analysis.base;
|
||||||
import analysis.style;
|
import analysis.style;
|
||||||
import analysis.enumarrayliteral;
|
import analysis.enumarrayliteral;
|
||||||
import analysis.pokemon;
|
import analysis.pokemon;
|
||||||
|
import analysis.del;
|
||||||
|
import analysis.fish;
|
||||||
|
|
||||||
void messageFunction(string fileName, size_t line, size_t column, string message,
|
void messageFunction(string fileName, size_t line, size_t column, string message,
|
||||||
bool isError)
|
bool isError)
|
||||||
|
@ -63,8 +65,14 @@ void analyze(File output, string[] fileNames, bool staticAnalyze = true)
|
||||||
auto pokemon = new PokemonExceptionCheck(fileName);
|
auto pokemon = new PokemonExceptionCheck(fileName);
|
||||||
pokemon.visit(m);
|
pokemon.visit(m);
|
||||||
|
|
||||||
|
auto del = new DeleteCheck(fileName);
|
||||||
|
del.visit(m);
|
||||||
|
|
||||||
|
auto fish = new FloatOperatorCheck(fileName);
|
||||||
|
fish.visit(m);
|
||||||
|
|
||||||
foreach (message; sort(chain(enums.messages, style.messages,
|
foreach (message; sort(chain(enums.messages, style.messages,
|
||||||
pokemon.messages).array))
|
pokemon.messages, del.messages, fish.messages).array))
|
||||||
{
|
{
|
||||||
writeln(message);
|
writeln(message);
|
||||||
}
|
}
|
||||||
|
|
5
main.d
5
main.d
|
@ -109,10 +109,11 @@ int main(string[] args)
|
||||||
}
|
}
|
||||||
else if (tokenDump)
|
else if (tokenDump)
|
||||||
{
|
{
|
||||||
|
writeln("text blank\tindex\tline\tcolumn\tcomment");
|
||||||
foreach (token; tokens)
|
foreach (token; tokens)
|
||||||
{
|
{
|
||||||
writeln("«", token.text is null ? str(token.type) : token.text,
|
writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
|
||||||
"» ", token.text !is null, " ", token.index, " ", token.line, " ", token.column, " ",
|
token.text !is null, token.index, token.line, token.column,
|
||||||
token.comment);
|
token.comment);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
149
stdx/d/ast.d
149
stdx/d/ast.d
|
@ -31,6 +31,58 @@ import std.string;
|
||||||
abstract class ASTVisitor
|
abstract class ASTVisitor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
void visit(ExpressionNode n)
|
||||||
|
{
|
||||||
|
if (cast(AddExpression) n) visit(cast(AddExpression) n);
|
||||||
|
else if (cast(AndAndExpression) n) visit(cast(AndAndExpression) n);
|
||||||
|
else if (cast(AndExpression) n) visit(cast(AndExpression) n);
|
||||||
|
else if (cast(AsmAddExp) n) visit(cast(AsmAddExp) n);
|
||||||
|
else if (cast(AsmAndExp) n) visit(cast(AsmAndExp) n);
|
||||||
|
else if (cast(AsmEqualExp) n) visit(cast(AsmEqualExp) n);
|
||||||
|
else if (cast(AsmLogAndExp) n) visit(cast(AsmLogAndExp) n);
|
||||||
|
else if (cast(AsmLogOrExp) n) visit(cast(AsmLogOrExp) n);
|
||||||
|
else if (cast(AsmMulExp) n) visit(cast(AsmMulExp) n);
|
||||||
|
else if (cast(AsmOrExp) n) visit(cast(AsmOrExp) n);
|
||||||
|
else if (cast(AsmRelExp) n) visit(cast(AsmRelExp) n);
|
||||||
|
else if (cast(AsmShiftExp) n) visit(cast(AsmShiftExp) n);
|
||||||
|
else if (cast(AssertExpression) n) visit(cast(AssertExpression) n);
|
||||||
|
else if (cast(AssignExpression) n) visit(cast(AssignExpression) n);
|
||||||
|
else if (cast(CmpExpression) n) visit(cast(CmpExpression) n);
|
||||||
|
else if (cast(DeleteExpression) n) visit(cast(DeleteExpression) n);
|
||||||
|
else if (cast(EqualExpression) n) visit(cast(EqualExpression) n);
|
||||||
|
else if (cast(Expression) n) visit(cast(Expression) n);
|
||||||
|
else if (cast(FunctionCallExpression) n) visit(cast(FunctionCallExpression) n);
|
||||||
|
else if (cast(FunctionLiteralExpression) n) visit(cast(FunctionLiteralExpression) n);
|
||||||
|
else if (cast(IdentityExpression) n) visit(cast(IdentityExpression) n);
|
||||||
|
else if (cast(ImportExpression) n) visit(cast(ImportExpression) n);
|
||||||
|
else if (cast(IndexExpression) n) visit(cast(IndexExpression) n);
|
||||||
|
else if (cast(InExpression) n) visit(cast(InExpression) n);
|
||||||
|
else if (cast(IsExpression) n) visit(cast(IsExpression) n);
|
||||||
|
else if (cast(LambdaExpression) n) visit(cast(LambdaExpression) n);
|
||||||
|
else if (cast(MixinExpression) n) visit(cast(MixinExpression) n);
|
||||||
|
else if (cast(MulExpression) n) visit(cast(MulExpression) n);
|
||||||
|
else if (cast(NewAnonClassExpression) n) visit(cast(NewAnonClassExpression) n);
|
||||||
|
else if (cast(NewExpression) n) visit(cast(NewExpression) n);
|
||||||
|
else if (cast(OrExpression) n) visit(cast(OrExpression) n);
|
||||||
|
else if (cast(OrOrExpression) n) visit(cast(OrOrExpression) n);
|
||||||
|
else if (cast(PostIncDecExpression) n) visit(cast(PostIncDecExpression) n);
|
||||||
|
else if (cast(PowExpression) n) visit(cast(PowExpression) n);
|
||||||
|
else if (cast(PragmaExpression) n) visit(cast(PragmaExpression) n);
|
||||||
|
else if (cast(PreIncDecExpression) n) visit(cast(PreIncDecExpression) n);
|
||||||
|
else if (cast(PrimaryExpression) n) visit(cast(PrimaryExpression) n);
|
||||||
|
else if (cast(RelExpression) n) visit(cast(RelExpression) n);
|
||||||
|
else if (cast(ShiftExpression) n) visit(cast(ShiftExpression) n);
|
||||||
|
else if (cast(SliceExpression) n) visit(cast(SliceExpression) n);
|
||||||
|
else if (cast(TemplateMixinExpression) n) visit(cast(TemplateMixinExpression) n);
|
||||||
|
else if (cast(TernaryExpression) n) visit(cast(TernaryExpression) n);
|
||||||
|
else if (cast(TraitsExpression) n) visit(cast(TraitsExpression) n);
|
||||||
|
else if (cast(TypeidExpression) n) visit(cast(TypeidExpression) n);
|
||||||
|
else if (cast(TypeofExpression) n) visit(cast(TypeofExpression) n);
|
||||||
|
else if (cast(UnaryExpression) n) visit(cast(UnaryExpression) n);
|
||||||
|
else if (cast(XorExpression) n) visit(cast(XorExpression) n);
|
||||||
|
}
|
||||||
|
|
||||||
/** */ void visit(AddExpression addExpression) { addExpression.accept(this); }
|
/** */ void visit(AddExpression addExpression) { addExpression.accept(this); }
|
||||||
/** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); }
|
/** */ void visit(AliasDeclaration aliasDeclaration) { aliasDeclaration.accept(this); }
|
||||||
/** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); }
|
/** */ void visit(AliasInitializer aliasInitializer) { aliasInitializer.accept(this); }
|
||||||
|
@ -104,7 +156,6 @@ public:
|
||||||
/** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); }
|
/** */ void visit(EponymousTemplateDeclaration eponymousTemplateDeclaration) { eponymousTemplateDeclaration.accept(this); }
|
||||||
/** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); }
|
/** */ void visit(EqualExpression equalExpression) { equalExpression.accept(this); }
|
||||||
/** */ void visit(Expression expression) { expression.accept(this); }
|
/** */ void visit(Expression expression) { expression.accept(this); }
|
||||||
/** */ void visit(ExpressionNode expressionNode) { expressionNode.accept(this); }
|
|
||||||
/** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); }
|
/** */ void visit(ExpressionStatement expressionStatement) { expressionStatement.accept(this); }
|
||||||
/** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); }
|
/** */ void visit(FinalSwitchStatement finalSwitchStatement) { finalSwitchStatement.accept(this); }
|
||||||
/** */ void visit(Finally finally_) { finally_.accept(this); }
|
/** */ void visit(Finally finally_) { finally_.accept(this); }
|
||||||
|
@ -234,10 +285,11 @@ public:
|
||||||
|
|
||||||
interface ASTNode
|
interface ASTNode
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
/** */ void accept(ASTVisitor visitor);
|
/** */ void accept(ASTVisitor visitor);
|
||||||
}
|
}
|
||||||
|
|
||||||
immutable string DEFAULT_ACCEPT = q{void accept(ASTVisitor visitor) {}};
|
immutable string DEFAULT_ACCEPT = q{override void accept(ASTVisitor visitor) {}};
|
||||||
|
|
||||||
template visitIfNotNull(fields ...)
|
template visitIfNotNull(fields ...)
|
||||||
{
|
{
|
||||||
|
@ -259,19 +311,28 @@ template visitIfNotNull(fields ...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class ExpressionNode : ASTNode {}
|
abstract class ExpressionNode : ASTNode
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
override void accept(ASTVisitor visitor)
|
||||||
|
{
|
||||||
|
assert (false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mixin template BinaryExpressionBody()
|
mixin template BinaryExpressionBody()
|
||||||
{
|
{
|
||||||
ExpressionNode left;
|
ExpressionNode left;
|
||||||
ExpressionNode right;
|
ExpressionNode right;
|
||||||
|
size_t line;
|
||||||
|
size_t column;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
class AddExpression : ExpressionNode
|
class AddExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -283,7 +344,7 @@ public:
|
||||||
class AliasDeclaration : ASTNode
|
class AliasDeclaration : ASTNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(type, name, initializers));
|
mixin (visitIfNotNull!(type, name, initializers));
|
||||||
}
|
}
|
||||||
|
@ -332,7 +393,7 @@ public:
|
||||||
class AndAndExpression : ExpressionNode
|
class AndAndExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -343,7 +404,7 @@ public:
|
||||||
class AndExpression : ExpressionNode
|
class AndExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -566,7 +627,7 @@ public:
|
||||||
class AssertExpression : ExpressionNode
|
class AssertExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(assertion, message));
|
mixin (visitIfNotNull!(assertion, message));
|
||||||
}
|
}
|
||||||
|
@ -578,7 +639,7 @@ public:
|
||||||
class AssignExpression : ExpressionNode
|
class AssignExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(ternaryExpression, assignExpression));
|
mixin (visitIfNotNull!(ternaryExpression, assignExpression));
|
||||||
}
|
}
|
||||||
|
@ -816,7 +877,7 @@ public:
|
||||||
class CmpExpression : ExpressionNode
|
class CmpExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(shiftExpression, equalExpression,
|
mixin (visitIfNotNull!(shiftExpression, equalExpression,
|
||||||
identityExpression, relExpression, inExpression));
|
identityExpression, relExpression, inExpression));
|
||||||
|
@ -1031,11 +1092,13 @@ public:
|
||||||
class DeleteExpression : ExpressionNode
|
class DeleteExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression));
|
mixin (visitIfNotNull!(unaryExpression));
|
||||||
}
|
}
|
||||||
/** */ UnaryExpression unaryExpression;
|
/** */ UnaryExpression unaryExpression;
|
||||||
|
/** */ size_t line;
|
||||||
|
/** */ size_t column;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
|
@ -1151,7 +1214,7 @@ public:
|
||||||
class EqualExpression : ExpressionNode
|
class EqualExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1163,7 +1226,7 @@ public:
|
||||||
class Expression : ExpressionNode
|
class Expression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(items));
|
mixin (visitIfNotNull!(items));
|
||||||
}
|
}
|
||||||
|
@ -1293,7 +1356,7 @@ public:
|
||||||
class FunctionCallExpression : ExpressionNode
|
class FunctionCallExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments));
|
mixin (visitIfNotNull!(unaryExpression, arguments, templateArguments));
|
||||||
}
|
}
|
||||||
|
@ -1306,7 +1369,7 @@ public:
|
||||||
class FunctionCallStatement : ASTNode
|
class FunctionCallStatement : ASTNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(functionCallExpression));
|
mixin (visitIfNotNull!(functionCallExpression));
|
||||||
}
|
}
|
||||||
|
@ -1338,7 +1401,7 @@ public:
|
||||||
class FunctionLiteralExpression : ExpressionNode
|
class FunctionLiteralExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(type, parameters, functionAttributes,
|
mixin (visitIfNotNull!(type, parameters, functionAttributes,
|
||||||
functionBody));
|
functionBody));
|
||||||
|
@ -1413,7 +1476,7 @@ public:
|
||||||
class IdentityExpression : ExpressionNode
|
class IdentityExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1478,7 +1541,7 @@ public:
|
||||||
class ImportExpression : ExpressionNode
|
class ImportExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(assignExpression));
|
mixin (visitIfNotNull!(assignExpression));
|
||||||
}
|
}
|
||||||
|
@ -1489,7 +1552,7 @@ public:
|
||||||
class IndexExpression : ExpressionNode
|
class IndexExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression, argumentList));
|
mixin (visitIfNotNull!(unaryExpression, argumentList));
|
||||||
}
|
}
|
||||||
|
@ -1501,7 +1564,7 @@ public:
|
||||||
class InExpression : ExpressionNode
|
class InExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1575,7 +1638,7 @@ public:
|
||||||
class IsExpression : ExpressionNode
|
class IsExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(type, identifier, typeSpecialization,
|
mixin (visitIfNotNull!(type, identifier, typeSpecialization,
|
||||||
templateParameterList));
|
templateParameterList));
|
||||||
|
@ -1626,7 +1689,7 @@ public:
|
||||||
class LambdaExpression : ExpressionNode
|
class LambdaExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(identifier, parameters, functionAttributes,
|
mixin (visitIfNotNull!(identifier, parameters, functionAttributes,
|
||||||
assignExpression));
|
assignExpression));
|
||||||
|
@ -1689,7 +1752,7 @@ public:
|
||||||
class MixinExpression : ExpressionNode
|
class MixinExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(assignExpression));
|
mixin (visitIfNotNull!(assignExpression));
|
||||||
}
|
}
|
||||||
|
@ -1748,7 +1811,7 @@ public:
|
||||||
class MulExpression : ExpressionNode
|
class MulExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1760,7 +1823,7 @@ public:
|
||||||
class NewAnonClassExpression : ExpressionNode
|
class NewAnonClassExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(allocatorArguments, constructorArguments,
|
mixin (visitIfNotNull!(allocatorArguments, constructorArguments,
|
||||||
baseClassList, structBody));
|
baseClassList, structBody));
|
||||||
|
@ -1775,7 +1838,7 @@ public:
|
||||||
class NewExpression : ExpressionNode
|
class NewExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(newAnonClassExpression, type, arguments,
|
mixin (visitIfNotNull!(newAnonClassExpression, type, arguments,
|
||||||
assignExpression));
|
assignExpression));
|
||||||
|
@ -1863,7 +1926,7 @@ public:
|
||||||
class OrExpression : ExpressionNode
|
class OrExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1874,7 +1937,7 @@ public:
|
||||||
class OrOrExpression : ExpressionNode
|
class OrOrExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1937,7 +2000,7 @@ public:
|
||||||
class PostIncDecExpression : ExpressionNode
|
class PostIncDecExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression));
|
mixin (visitIfNotNull!(unaryExpression));
|
||||||
}
|
}
|
||||||
|
@ -1949,7 +2012,7 @@ public:
|
||||||
class PowExpression : ExpressionNode
|
class PowExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -1971,7 +2034,7 @@ public:
|
||||||
class PragmaExpression : ExpressionNode
|
class PragmaExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(identifier, argumentList));
|
mixin (visitIfNotNull!(identifier, argumentList));
|
||||||
}
|
}
|
||||||
|
@ -1983,7 +2046,7 @@ public:
|
||||||
class PreIncDecExpression : ExpressionNode
|
class PreIncDecExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression));
|
mixin (visitIfNotNull!(unaryExpression));
|
||||||
}
|
}
|
||||||
|
@ -1995,7 +2058,7 @@ public:
|
||||||
class PrimaryExpression : ExpressionNode
|
class PrimaryExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(basicType, primary, typeofExpression,
|
mixin (visitIfNotNull!(basicType, primary, typeofExpression,
|
||||||
typeidExpression, arrayLiteral, assocArrayLiteral, expression,
|
typeidExpression, arrayLiteral, assocArrayLiteral, expression,
|
||||||
|
@ -2035,7 +2098,7 @@ public:
|
||||||
class RelExpression : ExpressionNode
|
class RelExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -2096,7 +2159,7 @@ public:
|
||||||
class ShiftExpression : ExpressionNode
|
class ShiftExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
@ -2120,7 +2183,7 @@ public:
|
||||||
class SliceExpression : ExpressionNode
|
class SliceExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(unaryExpression, lower, upper));
|
mixin (visitIfNotNull!(unaryExpression, lower, upper));
|
||||||
}
|
}
|
||||||
|
@ -2409,7 +2472,7 @@ public:
|
||||||
class TemplateMixinExpression : ExpressionNode
|
class TemplateMixinExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName));
|
mixin (visitIfNotNull!(identifier, templateArguments, mixinTemplateName));
|
||||||
}
|
}
|
||||||
|
@ -2534,7 +2597,7 @@ public:
|
||||||
class TernaryExpression : ExpressionNode
|
class TernaryExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression));
|
mixin (visitIfNotNull!(orOrExpression, expression, ternaryExpression));
|
||||||
}
|
}
|
||||||
|
@ -2558,7 +2621,7 @@ public:
|
||||||
class TraitsExpression : ExpressionNode
|
class TraitsExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(identifier, templateArgumentList));
|
mixin (visitIfNotNull!(identifier, templateArgumentList));
|
||||||
}
|
}
|
||||||
|
@ -2647,7 +2710,7 @@ public:
|
||||||
class TypeidExpression : ExpressionNode
|
class TypeidExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(type, expression));
|
mixin (visitIfNotNull!(type, expression));
|
||||||
}
|
}
|
||||||
|
@ -2659,7 +2722,7 @@ public:
|
||||||
class TypeofExpression : ExpressionNode
|
class TypeofExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(expression, return_));
|
mixin (visitIfNotNull!(expression, return_));
|
||||||
}
|
}
|
||||||
|
@ -2671,7 +2734,7 @@ public:
|
||||||
class UnaryExpression : ExpressionNode
|
class UnaryExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
// TODO prefix, postfix, unary
|
// TODO prefix, postfix, unary
|
||||||
mixin (visitIfNotNull!(primaryExpression, newExpression,
|
mixin (visitIfNotNull!(primaryExpression, newExpression,
|
||||||
|
@ -2803,7 +2866,7 @@ public:
|
||||||
class XorExpression : ExpressionNode
|
class XorExpression : ExpressionNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/+override+/ void accept(ASTVisitor visitor)
|
override void accept(ASTVisitor visitor)
|
||||||
{
|
{
|
||||||
mixin (visitIfNotNull!(left, right));
|
mixin (visitIfNotNull!(left, right));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1874,6 +1874,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
|
||||||
{
|
{
|
||||||
mixin(traceEnterAndExit!(__FUNCTION__));
|
mixin(traceEnterAndExit!(__FUNCTION__));
|
||||||
auto node = new DeleteExpression;
|
auto node = new DeleteExpression;
|
||||||
|
node.line = current.line;
|
||||||
|
node.column = current.column;
|
||||||
if (expect(tok!"delete") is null) return null;
|
if (expect(tok!"delete") is null) return null;
|
||||||
node.unaryExpression = parseUnaryExpression();
|
node.unaryExpression = parseUnaryExpression();
|
||||||
return node;
|
return node;
|
||||||
|
@ -3990,6 +3992,7 @@ q{(int a, ...)
|
||||||
*/
|
*/
|
||||||
PragmaDeclaration parsePragmaDeclaration()
|
PragmaDeclaration parsePragmaDeclaration()
|
||||||
{
|
{
|
||||||
|
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||||
auto node = new PragmaDeclaration;
|
auto node = new PragmaDeclaration;
|
||||||
node.pragmaExpression = parsePragmaExpression();
|
node.pragmaExpression = parsePragmaExpression();
|
||||||
expect(tok!";");
|
expect(tok!";");
|
||||||
|
@ -4005,6 +4008,7 @@ q{(int a, ...)
|
||||||
*/
|
*/
|
||||||
PragmaExpression parsePragmaExpression()
|
PragmaExpression parsePragmaExpression()
|
||||||
{
|
{
|
||||||
|
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||||
auto node = new PragmaExpression;
|
auto node = new PragmaExpression;
|
||||||
expect(tok!"pragma");
|
expect(tok!"pragma");
|
||||||
expect(tok!"(");
|
expect(tok!"(");
|
||||||
|
@ -4264,8 +4268,9 @@ q{(int a, ...)
|
||||||
* | $(LITERAL '!<=')
|
* | $(LITERAL '!<=')
|
||||||
* ;)
|
* ;)
|
||||||
*/
|
*/
|
||||||
ExpressionNode parseRelExpression(ExpressionNode shift = null)
|
ExpressionNode parseRelExpression(ExpressionNode shift)
|
||||||
{
|
{
|
||||||
|
mixin (traceEnterAndExit!(__FUNCTION__));
|
||||||
return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression,
|
return parseLeftAssocBinaryExpression!(RelExpression, ShiftExpression,
|
||||||
tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>",
|
tok!"<", tok!"<=", tok!">", tok!">=", tok!"!<>=", tok!"!<>",
|
||||||
tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<",
|
tok!"<>", tok!"<>=", tok!"!>", tok!"!>=", tok!"!>=", tok!"!<",
|
||||||
|
@ -6238,7 +6243,11 @@ protected:
|
||||||
{
|
{
|
||||||
auto n = new ExpressionType;
|
auto n = new ExpressionType;
|
||||||
static if (__traits(hasMember, ExpressionType, "operator"))
|
static if (__traits(hasMember, ExpressionType, "operator"))
|
||||||
|
{
|
||||||
|
n.line = current.line;
|
||||||
|
n.column = current.column;
|
||||||
n.operator = advance().type;
|
n.operator = advance().type;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
advance();
|
advance();
|
||||||
n.left = node;
|
n.left = node;
|
||||||
|
|
355
stdx/lexer.d
355
stdx/lexer.d
|
@ -1,8 +1,99 @@
|
||||||
// Written in the D programming language
|
// Written in the D programming language
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* $(H2 Summary)
|
||||||
* This module contains a range-based _lexer generator.
|
* This module contains a range-based _lexer generator.
|
||||||
*
|
*
|
||||||
|
* $(H2 Overview)
|
||||||
|
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
||||||
|
* several helper templates for generating such things as token identifiers.
|
||||||
|
*
|
||||||
|
* To write a _lexer using this API:
|
||||||
|
* $(OL
|
||||||
|
* $(LI Create the string array costants for your language.
|
||||||
|
* $(UL
|
||||||
|
* $(LI $(LINK2 #.StringConstants, String Constants))
|
||||||
|
* ))
|
||||||
|
* $(LI Create aliases for the various token and token identifier types
|
||||||
|
* specific to your language.
|
||||||
|
* $(UL
|
||||||
|
* $(LI $(LREF TokenIdType))
|
||||||
|
* $(LI $(LREF tokenStringRepresentation))
|
||||||
|
* $(LI $(LREF TokenStructure))
|
||||||
|
* $(LI $(LREF TokenId))
|
||||||
|
* ))
|
||||||
|
* $(LI Create a struct that mixes in the Lexer template mixin and
|
||||||
|
* implements the necessary functions.
|
||||||
|
* $(UL
|
||||||
|
* $(LI $(LREF Lexer))
|
||||||
|
* ))
|
||||||
|
* )
|
||||||
|
* Examples:
|
||||||
|
* $(UL
|
||||||
|
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
|
||||||
|
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
||||||
|
* )
|
||||||
|
* $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
|
||||||
|
* $(DL
|
||||||
|
* $(DT $(B staticTokens))
|
||||||
|
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
||||||
|
* possibly be a token handled by the default token lexing function. The
|
||||||
|
* most common example of this kind of token is an operator such as
|
||||||
|
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
||||||
|
* $(DT $(B dynamicTokens))
|
||||||
|
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
||||||
|
* identifiers, number literals, and string literals.)
|
||||||
|
* $(DT $(B possibleDefaultTokens))
|
||||||
|
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
||||||
|
* the default token handling function. An common example of this is
|
||||||
|
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
||||||
|
* the identifier $(D_STRING "fortunate"). isSeparating is called to
|
||||||
|
* determine if the character after the $(D_STRING 'r') separates the
|
||||||
|
* identifier, indicating that the token is $(D_STRING "for"), or if lexing
|
||||||
|
* should be turned over to the defaultTokenFunction.)
|
||||||
|
* $(DT $(B tokenHandlers))
|
||||||
|
* $(DD A mapping of prefixes to custom token handling function names. The
|
||||||
|
* generated _lexer will search for the even-index elements of this array,
|
||||||
|
* and then call the function whose name is the element immedately after the
|
||||||
|
* even-indexed element. This is used for lexing complex tokens whose prefix
|
||||||
|
* is fixed.)
|
||||||
|
* )
|
||||||
|
*
|
||||||
|
* Here are some example constants for a simple calculator _lexer:
|
||||||
|
* ---
|
||||||
|
* // There are a near infinite number of valid number literals, so numbers are
|
||||||
|
* // dynamic tokens.
|
||||||
|
* enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
|
||||||
|
*
|
||||||
|
* // The operators are always the same, and cannot start a numberLiteral, so
|
||||||
|
* // they are staticTokens
|
||||||
|
* enum string[] staticTokens = ["-", "+", "*", "/"];
|
||||||
|
*
|
||||||
|
* // In this simple example there are no keywords or other tokens that could
|
||||||
|
* // look like dynamic tokens, so this is blank.
|
||||||
|
* enum string[] possibleDefaultTokens = [];
|
||||||
|
*
|
||||||
|
* // If any whitespace character or digit is encountered, pass lexing over to
|
||||||
|
* // our custom handler functions. These will be demonstrated in an example
|
||||||
|
* // later on.
|
||||||
|
* enum string[] tokenHandlers = [
|
||||||
|
* "0", "lexNumber",
|
||||||
|
* "1", "lexNumber",
|
||||||
|
* "2", "lexNumber",
|
||||||
|
* "3", "lexNumber",
|
||||||
|
* "4", "lexNumber",
|
||||||
|
* "5", "lexNumber",
|
||||||
|
* "6", "lexNumber",
|
||||||
|
* "7", "lexNumber",
|
||||||
|
* "8", "lexNumber",
|
||||||
|
* "9", "lexNumber",
|
||||||
|
* " ", "lexWhitespace",
|
||||||
|
* "\n", "lexWhitespace",
|
||||||
|
* "\t", "lexWhitespace",
|
||||||
|
* "\r", "lexWhitespace"
|
||||||
|
* ];
|
||||||
|
* ---
|
||||||
|
*
|
||||||
* Copyright: Brian Schott 2013
|
* Copyright: Brian Schott 2013
|
||||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||||
* Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
* Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
||||||
|
@ -16,7 +107,12 @@ module stdx.lexer;
|
||||||
* unsigned integral type that is able to hold the value
|
* unsigned integral type that is able to hold the value
|
||||||
* staticTokens.length + dynamicTokens.length. For example if there are 20
|
* staticTokens.length + dynamicTokens.length. For example if there are 20
|
||||||
* static tokens, 30 dynamic tokens, and 10 possible default tokens, this
|
* static tokens, 30 dynamic tokens, and 10 possible default tokens, this
|
||||||
* template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max.
|
* template will alias itself to ubyte, as 20 + 30 + 10 < $(D_KEYWORD ubyte).max.
|
||||||
|
* Examples:
|
||||||
|
* ---
|
||||||
|
* // In our calculator example this means that IdType is an alias for ubyte.
|
||||||
|
* alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||||
|
* ---
|
||||||
*/
|
*/
|
||||||
template TokenIdType(alias staticTokens, alias dynamicTokens,
|
template TokenIdType(alias staticTokens, alias dynamicTokens,
|
||||||
alias possibleDefaultTokens)
|
alias possibleDefaultTokens)
|
||||||
|
@ -32,7 +128,15 @@ template TokenIdType(alias staticTokens, alias dynamicTokens,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up the string representation of the given token type.
|
* Looks up the string representation of the given token type. This is the
|
||||||
|
* opposite of the function of the TokenId template.
|
||||||
|
* Params: type = the token type identifier
|
||||||
|
* Examples:
|
||||||
|
* ---
|
||||||
|
* alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||||
|
* assert (str(tok!"*") == "*");
|
||||||
|
* ---
|
||||||
|
* See_also: $(LREF TokenId)
|
||||||
*/
|
*/
|
||||||
string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property
|
string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property
|
||||||
{
|
{
|
||||||
|
@ -57,18 +161,18 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
|
||||||
* valid token type identifier)
|
* valid token type identifier)
|
||||||
* )
|
* )
|
||||||
* In all cases this template will alias itself to a constant of type IdType.
|
* In all cases this template will alias itself to a constant of type IdType.
|
||||||
|
* This template will fail at compile time if $(D_PARAM symbol) is not one of
|
||||||
|
* the staticTokens, dynamicTokens, or possibleDefaultTokens.
|
||||||
* Examples:
|
* Examples:
|
||||||
* ---
|
* ---
|
||||||
* enum string[] staticTokens = ["+", "-", "*", "/"];
|
|
||||||
* enum string[] dynamicTokens = ["number"];
|
|
||||||
* enum string[] possibleDefaultTokens = [];
|
|
||||||
* alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
|
||||||
* template tok(string symbol)
|
* template tok(string symbol)
|
||||||
* {
|
* {
|
||||||
* alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
|
* alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
|
||||||
* possibleDefaultTokens, symbol);
|
* possibleDefaultTokens, symbol);
|
||||||
* }
|
* }
|
||||||
|
* // num and plus are of type ubyte.
|
||||||
* IdType plus = tok!"+";
|
* IdType plus = tok!"+";
|
||||||
|
* IdType num = tok!"numberLiteral";
|
||||||
* ---
|
* ---
|
||||||
*/
|
*/
|
||||||
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
||||||
|
@ -118,35 +222,49 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
||||||
/**
|
/**
|
||||||
* The token that is returned by the lexer.
|
* The token that is returned by the lexer.
|
||||||
* Params:
|
* Params:
|
||||||
* IDType = The D type of the "type" token type field.
|
* IdType = The D type of the "type" token type field.
|
||||||
* extraFields = A string containing D code for any extra fields that should
|
* extraFields = A string containing D code for any extra fields that should
|
||||||
* be included in the token structure body. This string is passed
|
* be included in the token structure body. This string is passed
|
||||||
* directly to a mixin statement.
|
* directly to a mixin statement.
|
||||||
|
* Examples:
|
||||||
|
* ---
|
||||||
|
* // No extra struct fields are desired in this example, so leave it blank.
|
||||||
|
* alias Token = TokenStructure!(IdType, "");
|
||||||
|
* Token minusToken = Token(tok!"-");
|
||||||
|
* ---
|
||||||
*/
|
*/
|
||||||
struct TokenStructure(IDType, string extraFields = "")
|
struct TokenStructure(IdType, string extraFields = "")
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* == overload for the the token type.
|
* == overload for the the token type.
|
||||||
*/
|
*/
|
||||||
bool opEquals(IDType type) const pure nothrow @safe
|
bool opEquals(IdType type) const pure nothrow @safe
|
||||||
{
|
{
|
||||||
return this.type == type;
|
return this.type == type;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Constructs a token from a token type.
|
||||||
|
* Params: type = the token type
|
||||||
*/
|
*/
|
||||||
this(IDType type)
|
this(IdType type)
|
||||||
{
|
{
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Constructs a token.
|
||||||
|
* Params:
|
||||||
|
* type = the token type
|
||||||
|
* text = the text of the token, which may be null
|
||||||
|
* line = the line number at which this token occurs
|
||||||
|
* column = the column nmuber at which this token occurs
|
||||||
|
* index = the byte offset from the beginning of the input at which this
|
||||||
|
* token occurs
|
||||||
*/
|
*/
|
||||||
this(IDType type, string text, size_t line, size_t column, size_t index)
|
this(IdType type, string text, size_t line, size_t column, size_t index)
|
||||||
{
|
{
|
||||||
this.text = text;
|
this.text = text;
|
||||||
this.line = line;
|
this.line = line;
|
||||||
|
@ -156,39 +274,105 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* The _text of the token.
|
||||||
*/
|
*/
|
||||||
string text;
|
string text;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* The line number at which this token occurs.
|
||||||
*/
|
*/
|
||||||
size_t line;
|
size_t line;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* The Column nmuber at which this token occurs.
|
||||||
*/
|
*/
|
||||||
size_t column;
|
size_t column;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* The byte offset from the beginning of the input at which this token
|
||||||
|
* occurs.
|
||||||
*/
|
*/
|
||||||
size_t index;
|
size_t index;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* The token type.
|
||||||
*/
|
*/
|
||||||
IDType type;
|
IdType type;
|
||||||
|
|
||||||
mixin (extraFields);
|
mixin (extraFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The implementation of the _lexer is contained within this mixin template.
|
||||||
|
* To use it, this template should be mixed in to a struct that represents the
|
||||||
|
* _lexer for your language. This struct should implement the following methods:
|
||||||
|
* $(UL
|
||||||
|
* $(LI popFront, which should call this mixin's _popFront() and
|
||||||
|
* additionally perform any token filtering or shuffling you deem
|
||||||
|
* necessary. For example, you can implement popFront to skip comment or
|
||||||
|
* tokens.)
|
||||||
|
* $(LI A function that serves as the default token lexing function. For
|
||||||
|
* most languages this will be the identifier lexing function.)
|
||||||
|
* $(LI A function that is able to determine if an identifier/keyword has
|
||||||
|
* come to an end. This function must retorn $(D_KEYWORD bool) and take
|
||||||
|
* a single $(D_KEYWORD size_t) argument representing the number of
|
||||||
|
* bytes to skip over before looking for a separating character.)
|
||||||
|
* $(LI Any functions referred to in the tokenHandlers template paramater.
|
||||||
|
* These functions must be marked $(D_KEYWORD pure nothrow), take no
|
||||||
|
* arguments, and return a token)
|
||||||
|
* $(LI A constructor that initializes the range field as well as calls
|
||||||
|
* popFront() exactly once (to initialize the _front field).)
|
||||||
|
* )
|
||||||
|
* Examples:
|
||||||
|
* ---
|
||||||
|
* struct CalculatorLexer
|
||||||
|
* {
|
||||||
|
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||||
|
* staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
||||||
|
*
|
||||||
|
* this (ubyte[] bytes)
|
||||||
|
* {
|
||||||
|
* this.range = LexerRange(bytes);
|
||||||
|
* popFront();
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* void popFront() pure
|
||||||
|
* {
|
||||||
|
* _popFront();
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Token lexNumber() pure nothrow @safe
|
||||||
|
* {
|
||||||
|
* ...
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Token lexWhitespace() pure nothrow @safe
|
||||||
|
* {
|
||||||
|
* ...
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Token defaultTokenFunction() pure nothrow @safe
|
||||||
|
* {
|
||||||
|
* // There is no default token in the example calculator language, so
|
||||||
|
* // this is always an error.
|
||||||
|
* range.popFront();
|
||||||
|
* return Token(tok!"");
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* bool isSeparating(size_t offset) pure nothrow @safe
|
||||||
|
* {
|
||||||
|
* // For this example language, always return true.
|
||||||
|
* return true;
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ---
|
||||||
|
*/
|
||||||
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||||
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
alias tokenHandlers, alias possibleDefaultTokens)
|
||||||
{
|
{
|
||||||
|
|
||||||
static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
||||||
~ " have a corresponding handler function name.");
|
~ " have a corresponding handler function name.");
|
||||||
|
|
||||||
static string generateMask(const ubyte[] arr)
|
static string generateMask(const ubyte[] arr)
|
||||||
|
@ -214,7 +398,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
import std.string;
|
import std.string;
|
||||||
import std.range;
|
import std.range;
|
||||||
|
|
||||||
string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2));
|
string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2));
|
||||||
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
|
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
|
||||||
string code;
|
string code;
|
||||||
for (size_t i = 0; i < allTokens.length; i++)
|
for (size_t i = 0; i < allTokens.length; i++)
|
||||||
|
@ -240,7 +424,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||||
{
|
{
|
||||||
return " return "
|
return " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
|
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
else if (staticTokens.countUntil(tokens[0]) >= 0)
|
else if (staticTokens.countUntil(tokens[0]) >= 0)
|
||||||
|
@ -251,7 +435,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||||
{
|
{
|
||||||
return " return "
|
return " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
|
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -271,14 +455,14 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
if (token.length <= 8)
|
if (token.length <= 8)
|
||||||
{
|
{
|
||||||
code ~= " return "
|
code ~= " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code ~= " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
|
code ~= " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
|
||||||
code ~= " return "
|
code ~= " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,16 +509,23 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the range primitive front().
|
||||||
|
*/
|
||||||
ref const(Token) front() pure nothrow const @property
|
ref const(Token) front() pure nothrow const @property
|
||||||
{
|
{
|
||||||
return _front;
|
return _front;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void _popFront() pure
|
void _popFront() pure
|
||||||
{
|
{
|
||||||
_front = advance();
|
_front = advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the range primitive empty().
|
||||||
|
*/
|
||||||
bool empty() pure const nothrow @property
|
bool empty() pure const nothrow @property
|
||||||
{
|
{
|
||||||
return _front.type == tok!"\0";
|
return _front.type == tok!"\0";
|
||||||
|
@ -359,9 +550,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// This only exists because the real array() can't be called at compile-time
|
||||||
* This only exists because the real array() can't be called at compile-time
|
|
||||||
*/
|
|
||||||
static string[] stupidToArray(R)(R range)
|
static string[] stupidToArray(R)(R range)
|
||||||
{
|
{
|
||||||
string[] retVal;
|
string[] retVal;
|
||||||
|
@ -397,13 +586,30 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The lexer input.
|
||||||
|
*/
|
||||||
LexerRange range;
|
LexerRange range;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The token that is currently at the front of the range.
|
||||||
|
*/
|
||||||
Token _front;
|
Token _front;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Range structure that wraps the _lexer's input.
|
||||||
|
*/
|
||||||
struct LexerRange
|
struct LexerRange
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Params:
|
||||||
|
* bytes = the _lexer input
|
||||||
|
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
||||||
|
* column = the initial column number
|
||||||
|
* line = the initial line number
|
||||||
|
*/
|
||||||
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
||||||
{
|
{
|
||||||
this.bytes = bytes;
|
this.bytes = bytes;
|
||||||
|
@ -412,31 +618,52 @@ struct LexerRange
|
||||||
this.line = line;
|
this.line = line;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns: a mark at the current position that can then be used with slice.
|
||||||
|
*/
|
||||||
size_t mark() const nothrow pure @safe
|
size_t mark() const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the range to the given position
|
||||||
|
* Params: m = the position to seek to
|
||||||
|
*/
|
||||||
void seek(size_t m) nothrow pure @safe
|
void seek(size_t m) nothrow pure @safe
|
||||||
{
|
{
|
||||||
index = m;
|
index = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returs a slice of the input byte array betwene the given mark and the
|
||||||
|
* current position.
|
||||||
|
* Params m = the beginning index of the slice to return
|
||||||
|
*/
|
||||||
const(ubyte)[] slice(size_t m) const nothrow pure @safe
|
const(ubyte)[] slice(size_t m) const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return bytes[m .. index];
|
return bytes[m .. index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the range primitive _empty.
|
||||||
|
*/
|
||||||
bool empty() const nothrow pure @safe
|
bool empty() const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return index >= bytes.length;
|
return index >= bytes.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the range primitive _front.
|
||||||
|
*/
|
||||||
ubyte front() const nothrow pure @safe
|
ubyte front() const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return bytes[index];
|
return bytes[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns: the current item as well as the items $(D_PARAM p) items ahead.
|
||||||
|
*/
|
||||||
const(ubyte)[] peek(size_t p) const nothrow pure @safe
|
const(ubyte)[] peek(size_t p) const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return index + p + 1 > bytes.length
|
return index + p + 1 > bytes.length
|
||||||
|
@ -444,48 +671,79 @@ struct LexerRange
|
||||||
: bytes[index .. index + p + 1];
|
: bytes[index .. index + p + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
ubyte peekAt(size_t offset) const nothrow pure @safe
|
ubyte peekAt(size_t offset) const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return bytes[index + offset];
|
return bytes[index + offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns: true if it is possible to peek $(D_PARAM p) bytes ahead.
|
||||||
|
*/
|
||||||
bool canPeek(size_t p) const nothrow pure @safe
|
bool canPeek(size_t p) const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return index + p < bytes.length;
|
return index + p < bytes.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the range primitive _popFront.
|
||||||
|
*/
|
||||||
void popFront() pure nothrow @safe
|
void popFront() pure nothrow @safe
|
||||||
{
|
{
|
||||||
index++;
|
index++;
|
||||||
column++;
|
column++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the algorithm _popFrontN more efficiently.
|
||||||
|
*/
|
||||||
void popFrontN(size_t n) pure nothrow @safe
|
void popFrontN(size_t n) pure nothrow @safe
|
||||||
{
|
{
|
||||||
index += n;
|
index += n;
|
||||||
|
column += n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increments the range's line number and resets the column counter.
|
||||||
|
*/
|
||||||
void incrementLine() pure nothrow @safe
|
void incrementLine() pure nothrow @safe
|
||||||
{
|
{
|
||||||
column = 1;
|
column = 1;
|
||||||
line++;
|
line++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The input _bytes.
|
||||||
|
*/
|
||||||
const(ubyte)[] bytes;
|
const(ubyte)[] bytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The range's current position.
|
||||||
|
*/
|
||||||
size_t index;
|
size_t index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The current _column number.
|
||||||
|
*/
|
||||||
size_t column;
|
size_t column;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The current _line number.
|
||||||
|
*/
|
||||||
size_t line;
|
size_t line;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The string cache should be used within lexer implementations for several
|
* The string cache implements a map/set for strings. Placing a string in the
|
||||||
* reasons:
|
* cache returns an identifier that can be used to instantly access the stored
|
||||||
* $(UL
|
* string. It is then possible to simply compare these indexes instead of
|
||||||
* $(LI Reducing memory consumption.)
|
* performing full string comparisons when comparing the string content of
|
||||||
* $(LI Increasing performance in token comparisons)
|
* dynamic tokens. The string cache also handles its own memory, so that mutable
|
||||||
* $(LI Correctly creating immutable token text if the lexing source is not
|
* ubyte[] to lexers can still have immutable string fields in their tokens.
|
||||||
* immutable)
|
* Because the string cache also performs de-duplication it is possible to
|
||||||
* )
|
* drastically reduce the memory usage of a lexer.
|
||||||
*/
|
*/
|
||||||
struct StringCache
|
struct StringCache
|
||||||
{
|
{
|
||||||
|
@ -493,7 +751,10 @@ public:
|
||||||
|
|
||||||
@disable this();
|
@disable this();
|
||||||
|
|
||||||
this(size_t bucketCount = defaultBucketCount)
|
/**
|
||||||
|
* Params: bucketCount = the initial number of buckets.
|
||||||
|
*/
|
||||||
|
this(size_t bucketCount)
|
||||||
{
|
{
|
||||||
buckets = new Item*[bucketCount];
|
buckets = new Item*[bucketCount];
|
||||||
}
|
}
|
||||||
|
@ -512,6 +773,9 @@ public:
|
||||||
return get(cache(bytes));
|
return get(cache(bytes));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to calling cache() and get().
|
||||||
|
*/
|
||||||
string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
|
string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
|
||||||
{
|
{
|
||||||
return get(cache(bytes, hash));
|
return get(cache(bytes, hash));
|
||||||
|
@ -536,6 +800,11 @@ public:
|
||||||
return cache(bytes, hash);
|
return cache(bytes, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caches a string as above, but uses the given has code instead of
|
||||||
|
* calculating one itself. Use this alongside hashStep() can reduce the
|
||||||
|
* amount of work necessary when lexing dynamic tokens.
|
||||||
|
*/
|
||||||
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
@ -583,11 +852,21 @@ public:
|
||||||
writeln("rehashes: ", rehashCount);
|
writeln("rehashes: ", rehashCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Incremental hashing.
|
||||||
|
* Params:
|
||||||
|
* b = the byte to add to the hash
|
||||||
|
* h = the hash that has been calculated so far
|
||||||
|
* Returns: the new hash code for the string.
|
||||||
|
*/
|
||||||
static uint hashStep(ubyte b, uint h) pure nothrow @safe
|
static uint hashStep(ubyte b, uint h) pure nothrow @safe
|
||||||
{
|
{
|
||||||
return (h ^ sbox[b]) * 3;
|
return (h ^ sbox[b]) * 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default bucket count for the string cache.
|
||||||
|
*/
|
||||||
static enum defaultBucketCount = 2048;
|
static enum defaultBucketCount = 2048;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -0,0 +1,483 @@
|
||||||
|
<h1>stdx.lexer</h1> <!-- Generated by Ddoc from lexer.d -->
|
||||||
|
This module contains a range-based lexer generator.
|
||||||
|
<p></p>
|
||||||
|
The lexer generator consists of a template mixin, Lexer, along with several
|
||||||
|
helper templates for generating such things as token identifiers.
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
To generate a lexer using this API, several constants must be supplied:
|
||||||
|
<dl><dt>staticTokens</dt>
|
||||||
|
<dd>A listing of the tokens whose exact value never changes and which cannot
|
||||||
|
possibly be a token handled by the default token lexing function. The
|
||||||
|
most common example of this kind of token is an operator such as "*", or
|
||||||
|
"-" in a programming language.</dd>
|
||||||
|
<dt>dynamicTokens</dt>
|
||||||
|
<dd>A listing of tokens whose value is variable, such as whitespace,
|
||||||
|
identifiers, number literals, and string literals.</dd>
|
||||||
|
<dt>possibleDefaultTokens</dt>
|
||||||
|
<dd>A listing of tokens that could posibly be one of the tokens handled by
|
||||||
|
the default token handling function. An common example of this is
|
||||||
|
a keyword such as <span class="d_string">"for"</span>, which looks like the beginning of
|
||||||
|
the identifier <span class="d_string">"fortunate"</span>. isSeparating is called to
|
||||||
|
determine if the character after the <span class="d_string">'r'</span> separates the
|
||||||
|
identifier, indicating that the token is <span class="d_string">"for"</span>, or if lexing
|
||||||
|
should be turned over to the defaultTokenFunction.</dd>
|
||||||
|
<dt>tokenHandlers</dt>
|
||||||
|
<dd>A mapping of prefixes to custom token handling function names. The
|
||||||
|
generated lexer will search for the even-index elements of this array,
|
||||||
|
and then call the function whose name is the element immedately after the
|
||||||
|
even-indexed element. This is used for lexing complex tokens whose prefix
|
||||||
|
is fixed.</dd>
|
||||||
|
</dl>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
Here are some example constants for a simple calculator lexer:
|
||||||
|
<pre class="d_code"><span class="d_comment">// There are a near infinite number of valid number literals, so numbers are
|
||||||
|
</span><span class="d_comment">// dynamic tokens.
|
||||||
|
</span><span class="d_keyword">enum</span> string[] dynamicTokens = [<span class="d_string">"numberLiteral"</span>, <span class="d_string">"whitespace"</span>];
|
||||||
|
|
||||||
|
<span class="d_comment">// The operators are always the same, and cannot start a numberLiteral, so
|
||||||
|
</span><span class="d_comment">// they are staticTokens
|
||||||
|
</span><span class="d_keyword">enum</span> string[] staticTokens = [<span class="d_string">"-"</span>, <span class="d_string">"+"</span>, <span class="d_string">"*"</span>, <span class="d_string">"/"</span>];
|
||||||
|
|
||||||
|
<span class="d_comment">// In this simple example there are no keywords or other tokens that could
|
||||||
|
</span><span class="d_comment">// look like dynamic tokens, so this is blank.
|
||||||
|
</span><span class="d_keyword">enum</span> string[] possibleDefaultTokens = [];
|
||||||
|
|
||||||
|
<span class="d_comment">// If any whitespace character or digit is encountered, pass lexing over to
|
||||||
|
</span><span class="d_comment">// our custom handler functions. These will be demonstrated in an example
|
||||||
|
</span><span class="d_comment">// later on.
|
||||||
|
</span><span class="d_keyword">enum</span> string[] tokenHandlers = [
|
||||||
|
<span class="d_string">"0"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"1"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"2"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"3"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"4"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"5"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"6"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"7"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"8"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">"9"</span>, <span class="d_string">"lexNumber"</span>,
|
||||||
|
<span class="d_string">" "</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||||
|
<span class="d_string">"\n"</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||||
|
<span class="d_string">"\t"</span>, <span class="d_string">"lexWhitespace"</span>,
|
||||||
|
<span class="d_string">"\r"</span>, <span class="d_string">"lexWhitespace"</span>
|
||||||
|
];
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p></p>
|
||||||
|
<b>Examples:</b><br><ul><li>A lexer for D is available <a href="https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d">here</a>.</li>
|
||||||
|
<li>A lexer for Lua is available <a href="https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d">here</a>.</li>
|
||||||
|
</ul>
|
||||||
|
<p></p>
|
||||||
|
<b>License:</b><br><a href="http://www.boost.org/LICENSE_1_0.txt Boost">License 1.0</a>
|
||||||
|
<p></p>
|
||||||
|
<b>Authors:</b><br>Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
||||||
|
<p></p>
|
||||||
|
<b>Source:</b><br>
|
||||||
|
<a href="https://github.com/D-Programming-Language/phobos/blob/master/std/lexer.d">std/lexer.d</a><p></p>
|
||||||
|
|
||||||
|
<dl><dt class="d_decl"><a name=".TokenIdType"></a>template <a name="TokenIdType"></a><span class="ddoc_psymbol">TokenIdType</span>(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)</dt>
|
||||||
|
<dd>Template for determining the type used for a token type. Selects the smallest
|
||||||
|
unsigned integral type that is able to hold the value
|
||||||
|
staticTokens.length + dynamicTokens.length. For example if there are 20
|
||||||
|
static tokens, 30 dynamic tokens, and 10 possible default tokens, this
|
||||||
|
template will alias itself to ubyte, as 20 + 30 + 10 < <span class="d_keyword">ubyte</span>.max.
|
||||||
|
<p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// In our calculator example this means that IdType is an alias for ubyte.
|
||||||
|
</span><span class="d_keyword">alias</span> IdType = <span class="d_psymbol">TokenIdType</span>!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".tokenStringRepresentation"></a>@property string <a name="tokenStringRepresentation"></a><span class="ddoc_psymbol">tokenStringRepresentation</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType <i>type</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Looks up the string representation of the given token type. This is the
|
||||||
|
opposite of the function of the TokenId template.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType type</td>
|
||||||
|
<td valign=top>the token type identifier</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">alias</span> str = <span class="d_psymbol">tokenStringRepresentation</span>(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||||
|
<span class="d_keyword">assert</span> (str(tok!<span class="d_string">"*"</span>) == <span class="d_string">"*"</span>);
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
<b>See Also:</b><br>TokenId<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".TokenId"></a>template <a name="TokenId"></a><span class="ddoc_psymbol">TokenId</span>(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)</dt>
|
||||||
|
<dd>Generates the token type identifier for the given symbol. There are two
|
||||||
|
special cases:
|
||||||
|
<ul> <li>If symbol is "", then the token identifier will be 0</li>
|
||||||
|
<li>If symbol is "\0", then the token identifier will be the maximum
|
||||||
|
valid token type identifier</li>
|
||||||
|
</ul>
|
||||||
|
In all cases this template will alias itself to a constant of type IdType.
|
||||||
|
This template will fail at compile time if <span class="d_param">symbol</span> is not one of
|
||||||
|
the staticTokens, dynamicTokens, or possibleDefaultTokens.
|
||||||
|
<p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">template</span> tok(string symbol)
|
||||||
|
{
|
||||||
|
<span class="d_keyword">alias</span> tok = <span class="d_psymbol">TokenId</span>!(IdType, staticTokens, dynamicTokens,
|
||||||
|
possibleDefaultTokens, symbol);
|
||||||
|
}
|
||||||
|
<span class="d_comment">// num and plus are of type ubyte.
|
||||||
|
</span>IdType plus = tok!<span class="d_string">"+"</span>;
|
||||||
|
IdType num = tok!<span class="d_string">"numberLiteral"</span>;
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".TokenStructure"></a>struct <a name="TokenStructure"></a><span class="ddoc_psymbol">TokenStructure</span>(IdType, string extraFields = "");
|
||||||
|
</dt>
|
||||||
|
<dd>The token that is returned by the lexer.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType</td>
|
||||||
|
<td valign=top>The D type of the "type" token type field.</td></tr>
|
||||||
|
<tr><td valign=top>extraFields</td>
|
||||||
|
<td valign=top>A string containing D code for any extra fields that should
|
||||||
|
be included in the token structure body. This string is passed
|
||||||
|
directly to a mixin statement.</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code"><span class="d_comment">// No extra struct fields are desired in this example, so leave it blank.
|
||||||
|
</span><span class="d_keyword">alias</span> Token = <span class="d_psymbol">TokenStructure</span>!(IdType, <span class="d_string">""</span>);
|
||||||
|
Token minusToken = Token(tok!<span class="d_string">"-"</span>);
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
<dl><dt class="d_decl"><a name=".opEquals"></a>const pure nothrow @safe bool <a name="opEquals"></a><span class="ddoc_psymbol">opEquals</span>(IdType <i>type</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>== overload for the the token <i>type</i>.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Constructs a token from a token <i>type</i>.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
|
||||||
|
<td valign=top>the token <i>type</i></td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".this"></a> this(IdType <i>type</i>, string <i>text</i>, size_t <i>line</i>, size_t <i>column</i>, size_t <i>index</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Constructs a token.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>IdType <i>type</i></td>
|
||||||
|
<td valign=top>the token <i>type</i></td></tr>
|
||||||
|
<tr><td valign=top>string <i>text</i></td>
|
||||||
|
<td valign=top>the <i>text</i> of the token, which may be <b>null</b></td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>line</i></td>
|
||||||
|
<td valign=top>the <i>line</i> number at which this token occurs</td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>column</i></td>
|
||||||
|
<td valign=top>the <i>column</i> nmuber at which this token occurs</td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>index</i></td>
|
||||||
|
<td valign=top>the byte offset from the beginning of the input at which this
|
||||||
|
token occurs</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".text"></a>string <a name="text"></a><span class="ddoc_psymbol">text</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The <a name="text"></a><span class="ddoc_psymbol">text</span> of the token.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The <a name="line"></a><span class="ddoc_psymbol">line</span> number at which this token occurs.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The Column nmuber at which this token occurs.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The byte offset from the beginning of the input at which this token
|
||||||
|
occurs.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".type"></a>IdType <a name="type"></a><span class="ddoc_psymbol">type</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The token <a name="type"></a><span class="ddoc_psymbol">type</span>.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".Lexer"></a>template <a name="Lexer"></a><span class="ddoc_psymbol">Lexer</span>(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)</dt>
|
||||||
|
<dd>The implementation of the lexer is contained within this mixin template.
|
||||||
|
To use it, this template should be mixed in to a struct that represents the
|
||||||
|
lexer for your language. This struct should implement the following methods:
|
||||||
|
<ul> <li>popFront, which should call this mixin's popFront() and
|
||||||
|
additionally perform any token filtering or shuffling you deem
|
||||||
|
necessary. For example, you can implement popFront to skip comment or
|
||||||
|
tokens.</li>
|
||||||
|
<li>A function that serves as the default token lexing function. For
|
||||||
|
most languages this will be the identifier lexing function.</li>
|
||||||
|
<li>A function that is able to determine if an identifier/keyword has
|
||||||
|
come to an end. This function must retorn <span class="d_keyword">bool</span> and take
|
||||||
|
a single <span class="d_keyword">size_t</span> argument representing the number of
|
||||||
|
bytes to skip over before looking for a separating character.</li>
|
||||||
|
<li>Any functions referred to in the tokenHandlers template paramater.
|
||||||
|
These functions must be marked <span class="d_keyword">pure nothrow</span>, take no
|
||||||
|
arguments, and return a token</li>
|
||||||
|
<li>A constructor that initializes the range field as well as calls
|
||||||
|
popFront() exactly once (to initialize the front field).</li>
|
||||||
|
</ul>
|
||||||
|
<p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code"><span class="d_keyword">struct</span> CalculatorLexer
|
||||||
|
{
|
||||||
|
<span class="d_keyword">mixin</span> <span class="d_psymbol">Lexer</span>!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||||
|
staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
||||||
|
|
||||||
|
<span class="d_keyword">this</span> (<span class="d_keyword">ubyte</span>[] bytes)
|
||||||
|
{
|
||||||
|
<span class="d_keyword">this</span>.range = LexerRange(bytes);
|
||||||
|
popFront();
|
||||||
|
}
|
||||||
|
|
||||||
|
<span class="d_keyword">void</span> popFront() <span class="d_keyword">pure</span>
|
||||||
|
{
|
||||||
|
_popFront();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token lexNumber() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
Token lexWhitespace() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
Token defaultTokenFunction() <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||||
|
{
|
||||||
|
<span class="d_comment">// There is no default token in the example calculator language, so
|
||||||
|
</span> <span class="d_comment">// this is always an error.
|
||||||
|
</span> range.popFront();
|
||||||
|
<span class="d_keyword">return</span> Token(tok!<span class="d_string">""</span>);
|
||||||
|
}
|
||||||
|
|
||||||
|
<span class="d_keyword">bool</span> isSeparating(size_t offset) <span class="d_keyword">pure</span> <span class="d_keyword">nothrow</span> @safe
|
||||||
|
{
|
||||||
|
<span class="d_comment">// For this example language, always return true.
|
||||||
|
</span> <span class="d_keyword">return</span> <span class="d_keyword">true</span>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
<dl><dt class="d_decl"><a name=".front"></a>const pure nothrow @property const(Token) <a name="front"></a><span class="ddoc_psymbol">front</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the range primitive <a name="front"></a><span class="ddoc_psymbol">front</span>().<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".empty"></a>const pure nothrow @property bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the range primitive <a name="empty"></a><span class="ddoc_psymbol">empty</span>().<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".range"></a>LexerRange <a name="range"></a><span class="ddoc_psymbol">range</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The lexer input.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name="._front"></a>Token <a name="_front"></a><span class="ddoc_psymbol">_front</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The token that is currently at the front of the range.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange"></a>struct <a name="LexerRange"></a><span class="ddoc_psymbol">LexerRange</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>Range structure that wraps the lexer's input.<p></p>
|
||||||
|
|
||||||
|
<dl><dt class="d_decl"><a name=".LexerRange.this"></a>pure nothrow @safe this(const(ubyte)[] <i>bytes</i>, size_t <i>index</i> = 0, size_t <i>column</i> = 1, size_t <i>line</i> = 1);
|
||||||
|
</dt>
|
||||||
|
<dd><b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
|
||||||
|
<td valign=top>the lexer input</td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>index</i></td>
|
||||||
|
<td valign=top>the initial offset from the beginning of <span class="d_param"><i>bytes</i></span></td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>column</i></td>
|
||||||
|
<td valign=top>the initial <i>column</i> number</td></tr>
|
||||||
|
<tr><td valign=top>size_t <i>line</i></td>
|
||||||
|
<td valign=top>the initial <i>line</i> number</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.mark"></a>const pure nothrow @safe size_t <a name="mark"></a><span class="ddoc_psymbol">mark</span>();
|
||||||
|
</dt>
|
||||||
|
<dd><b>Returns:</b><br>a <a name="mark"></a><span class="ddoc_psymbol">mark</span> at the current position that can then be used with slice.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.seek"></a>pure nothrow @safe void <a name="seek"></a><span class="ddoc_psymbol">seek</span>(size_t <i>m</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Sets the range to the given position
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>m</i></td>
|
||||||
|
<td valign=top>the position to <a name="seek"></a><span class="ddoc_psymbol">seek</span> to</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.slice"></a>const pure nothrow @safe const(ubyte)[] <a name="slice"></a><span class="ddoc_psymbol">slice</span>(size_t <i>m</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Returs a <a name="slice"></a><span class="ddoc_psymbol">slice</span> of the input byte array betwene the given mark and the
|
||||||
|
current position.
|
||||||
|
Params <i>m</i> = the beginning index of the <a name="slice"></a><span class="ddoc_psymbol">slice</span> to return<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.empty"></a>const pure nothrow @safe bool <a name="empty"></a><span class="ddoc_psymbol">empty</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the range primitive empty.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.front"></a>const pure nothrow @safe ubyte <a name="front"></a><span class="ddoc_psymbol">front</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the range primitive front.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.peek"></a>const pure nothrow @safe const(ubyte)[] <a name="peek"></a><span class="ddoc_psymbol">peek</span>(size_t <i>p</i>);
|
||||||
|
</dt>
|
||||||
|
<dd><b>Returns:</b><br>the current item as well as the items <span class="d_param"><i>p</i></span> items ahead.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.peekAt"></a>const pure nothrow @safe ubyte <a name="peekAt"></a><span class="ddoc_psymbol">peekAt</span>(size_t <i>offset</i>);
|
||||||
|
</dt>
|
||||||
|
<dd><p></p>
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.canPeek"></a>const pure nothrow @safe bool <a name="canPeek"></a><span class="ddoc_psymbol">canPeek</span>(size_t <i>p</i>);
|
||||||
|
</dt>
|
||||||
|
<dd><b>Returns:</b><br><b>true</b> if it is possible to peek <span class="d_param"><i>p</i></span> bytes ahead.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.popFront"></a>pure nothrow @safe void <a name="popFront"></a><span class="ddoc_psymbol">popFront</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the range primitive popFront.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.popFrontN"></a>pure nothrow @safe void <a name="popFrontN"></a><span class="ddoc_psymbol">popFrontN</span>(size_t <i>n</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Implements the algorithm popFrontN more efficiently.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.incrementLine"></a>pure nothrow @safe void <a name="incrementLine"></a><span class="ddoc_psymbol">incrementLine</span>();
|
||||||
|
</dt>
|
||||||
|
<dd>Increments the range's line number and resets the column counter.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.bytes"></a>const(ubyte)[] <a name="bytes"></a><span class="ddoc_psymbol">bytes</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The input bytes.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.index"></a>size_t <a name="index"></a><span class="ddoc_psymbol">index</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The range's current position.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.column"></a>size_t <a name="column"></a><span class="ddoc_psymbol">column</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The current column number.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".LexerRange.line"></a>size_t <a name="line"></a><span class="ddoc_psymbol">line</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The current line number.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache"></a>struct <a name="StringCache"></a><span class="ddoc_psymbol">StringCache</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The string cache implements a map/set for strings. Placing a string in the
|
||||||
|
cache returns an identifier that can be used to instantly access the stored
|
||||||
|
string. It is then possible to simply compare these indexes instead of
|
||||||
|
performing full string comparisons when comparing the string content of
|
||||||
|
dynamic tokens. The string cache also handles its own memory, so that mutable
|
||||||
|
ubyte[] to lexers can still have immutable string fields in their tokens.
|
||||||
|
Because the string cache also performs de-duplication it is possible to
|
||||||
|
drastically reduce the memory usage of a lexer.<p></p>
|
||||||
|
|
||||||
|
<dl><dt class="d_decl"><a name=".StringCache.this"></a> this(size_t <i>bucketCount</i>);
|
||||||
|
</dt>
|
||||||
|
<dd><b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>bucketCount</i></td>
|
||||||
|
<td valign=top>the initial number of buckets.</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Equivalent to calling cache() and get().
|
||||||
|
<pre class="d_code">StringCache cache;
|
||||||
|
<span class="d_keyword">ubyte</span>[] str = ['a', 'b', 'c'];
|
||||||
|
string s = cache.get(cache.cache(str));
|
||||||
|
<span class="d_keyword">assert</span>(s == <span class="d_string">"abc"</span>);
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.cacheGet"></a>pure nothrow @safe string <a name="cacheGet"></a><span class="ddoc_psymbol">cacheGet</span>(const(ubyte[]) <i>bytes</i>, uint <i>hash</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Equivalent to calling cache() and get().<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Caches a string.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>const(ubyte)[] <i>bytes</i></td>
|
||||||
|
<td valign=top>the string to <a name="cache"></a><span class="ddoc_psymbol">cache</span></td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
<b>Returns:</b><br>A key that can be used to retrieve the cached string
|
||||||
|
<p></p>
|
||||||
|
<b>Examples:</b><br><pre class="d_code">StringCache <span class="d_psymbol">cache</span>;
|
||||||
|
<span class="d_keyword">ubyte</span>[] <span class="d_param">bytes</span> = ['a', 'b', 'c'];
|
||||||
|
size_t first = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
|
||||||
|
size_t second = <span class="d_psymbol">cache</span>.<span class="d_psymbol">cache</span>(<span class="d_param">bytes</span>);
|
||||||
|
<span class="d_keyword">assert</span> (first == second);
|
||||||
|
</pre>
|
||||||
|
<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.cache"></a>pure nothrow @safe size_t <a name="cache"></a><span class="ddoc_psymbol">cache</span>(const(ubyte)[] <i>bytes</i>, uint <i>hash</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Caches a string as above, but uses the given has code instead of
|
||||||
|
calculating one itself. Use this alongside hashStep() can reduce the
|
||||||
|
amount of work necessary when lexing dynamic tokens.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.get"></a>const pure nothrow @safe string <a name="get"></a><span class="ddoc_psymbol">get</span>(size_t <i>index</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Gets a cached string based on its key.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>size_t <i>index</i></td>
|
||||||
|
<td valign=top>the key</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
<b>Returns:</b><br>the cached string<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.hashStep"></a>static pure nothrow @safe uint <a name="hashStep"></a><span class="ddoc_psymbol">hashStep</span>(ubyte <i>b</i>, uint <i>h</i>);
|
||||||
|
</dt>
|
||||||
|
<dd>Incremental hashing.
|
||||||
|
<p></p>
|
||||||
|
<b>Parameters:</b><table class=parms><tr><td valign=top>ubyte <i>b</i></td>
|
||||||
|
<td valign=top>the byte to add to the hash</td></tr>
|
||||||
|
<tr><td valign=top>uint <i>h</i></td>
|
||||||
|
<td valign=top>the hash that has been calculated so far</td></tr>
|
||||||
|
</table><p></p>
|
||||||
|
<b>Returns:</b><br>the new hash code for the string.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
<dt class="d_decl"><a name=".StringCache.defaultBucketCount"></a>static int <a name="defaultBucketCount"></a><span class="ddoc_psymbol">defaultBucketCount</span>;
|
||||||
|
</dt>
|
||||||
|
<dd>The default bucket count for the string cache.<p></p>
|
||||||
|
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<table width=100%><tr><td><hr align="left" size="8" width="100%" color="maroon" /></td><td width=5%><a href=#top>[top]</a></td></tr></table>
|
Binary file not shown.
Loading…
Reference in New Issue