Updated DDoc, made template order more consistent.
This commit is contained in:
parent
c56716e096
commit
2eba33c1d3
|
@ -399,8 +399,8 @@ public struct DLexer
|
||||||
{
|
{
|
||||||
import core.vararg;
|
import core.vararg;
|
||||||
|
|
||||||
mixin Lexer!(Token, lexIdentifier, isSeparating, pseudoTokenHandlers,
|
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
|
||||||
operators, dynamicTokens, keywords);
|
keywords, pseudoTokenHandlers);
|
||||||
|
|
||||||
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
||||||
{
|
{
|
||||||
|
|
83
stdx/lexer.d
83
stdx/lexer.d
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* $(H2 Summary)
|
* $(H2 Summary)
|
||||||
* This module contains a range-based _lexer generator.
|
* This module contains a range-based compile-time _lexer generator.
|
||||||
*
|
*
|
||||||
* $(H2 Overview)
|
* $(H2 Overview)
|
||||||
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
||||||
|
@ -12,7 +12,10 @@
|
||||||
* $(OL
|
* $(OL
|
||||||
* $(LI Create the string array costants for your language.
|
* $(LI Create the string array costants for your language.
|
||||||
* $(UL
|
* $(UL
|
||||||
* $(LI $(LINK2 #.StringConstants, String Constants))
|
* $(LI $(LINK2 #.staticTokens, staticTokens))
|
||||||
|
* $(LI $(LINK2 #.dynamicTokens, dynamicTokens))
|
||||||
|
* $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens))
|
||||||
|
* $(LI $(LINK2 #.tokenHandlers, tokenHandlers))
|
||||||
* ))
|
* ))
|
||||||
* $(LI Create aliases for the various token and token identifier types
|
* $(LI Create aliases for the various token and token identifier types
|
||||||
* specific to your language.
|
* specific to your language.
|
||||||
|
@ -33,25 +36,33 @@
|
||||||
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
|
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
|
||||||
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
||||||
* )
|
* )
|
||||||
* $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
|
* $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions)
|
||||||
* $(DL
|
* $(DL
|
||||||
* $(DT $(B staticTokens))
|
* $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction)
|
||||||
|
* $(DD A function that serves as the default token lexing function. For most
|
||||||
|
* languages this will be the identifier lexing function.))
|
||||||
|
* $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction))
|
||||||
|
* $(DD A function that is able to determine if an identifier/keyword has come
|
||||||
|
* to an end. This function must return bool and take a single size_t
|
||||||
|
* argument representing the number of bytes to skip over before looking for
|
||||||
|
* a separating character.)
|
||||||
|
* $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens))
|
||||||
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
||||||
* possibly be a token handled by the default token lexing function. The
|
* possibly be a token handled by the default token lexing function. The
|
||||||
* most common example of this kind of token is an operator such as
|
* most common example of this kind of token is an operator such as
|
||||||
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
||||||
* $(DT $(B dynamicTokens))
|
* $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens))
|
||||||
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
||||||
* identifiers, number literals, and string literals.)
|
* identifiers, number literals, and string literals.)
|
||||||
* $(DT $(B possibleDefaultTokens))
|
* $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens))
|
||||||
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
||||||
* the default token handling function. An common example of this is
|
* the default token handling function. An common example of this is
|
||||||
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
||||||
* the identifier $(D_STRING "fortunate"). isSeparating is called to
|
* the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is
|
||||||
* determine if the character after the $(D_STRING 'r') separates the
|
* called to determine if the character after the $(D_STRING 'r') separates
|
||||||
* identifier, indicating that the token is $(D_STRING "for"), or if lexing
|
* the identifier, indicating that the token is $(D_STRING "for"), or if
|
||||||
* should be turned over to the defaultTokenFunction.)
|
* lexing should be turned over to the $(B defaultTokenFunction).)
|
||||||
* $(DT $(B tokenHandlers))
|
* $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers))
|
||||||
* $(DD A mapping of prefixes to custom token handling function names. The
|
* $(DD A mapping of prefixes to custom token handling function names. The
|
||||||
* generated _lexer will search for the even-index elements of this array,
|
* generated _lexer will search for the even-index elements of this array,
|
||||||
* and then call the function whose name is the element immedately after the
|
* and then call the function whose name is the element immedately after the
|
||||||
|
@ -155,7 +166,7 @@ unittest
|
||||||
{
|
{
|
||||||
/// Fix https://github.com/Hackerpilot/Dscanner/issues/96
|
/// Fix https://github.com/Hackerpilot/Dscanner/issues/96
|
||||||
alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]);
|
alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]);
|
||||||
alias tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
|
enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
|
||||||
alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]);
|
alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]);
|
||||||
|
|
||||||
static assert(str(tok!"foo") == "foo");
|
static assert(str(tok!"foo") == "foo");
|
||||||
|
@ -167,8 +178,8 @@ unittest
|
||||||
* Generates the token type identifier for the given symbol. There are two
|
* Generates the token type identifier for the given symbol. There are two
|
||||||
* special cases:
|
* special cases:
|
||||||
* $(UL
|
* $(UL
|
||||||
* $(LI If symbol is "", then the token identifier will be 0)
|
* $(LI If symbol is $(D_STRING ""), then the token identifier will be 0)
|
||||||
* $(LI If symbol is "\0", then the token identifier will be the maximum
|
* $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum
|
||||||
* valid token type identifier)
|
* valid token type identifier)
|
||||||
* )
|
* )
|
||||||
* In all cases this template will alias itself to a constant of type IdType.
|
* In all cases this template will alias itself to a constant of type IdType.
|
||||||
|
@ -320,12 +331,20 @@ public:
|
||||||
* $(LI A constructor that initializes the range field as well as calls
|
* $(LI A constructor that initializes the range field as well as calls
|
||||||
* popFront() exactly once (to initialize the _front field).)
|
* popFront() exactly once (to initialize the _front field).)
|
||||||
* )
|
* )
|
||||||
|
* Params:
|
||||||
|
* Token = $(LREF TokenStructure)
|
||||||
|
* defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction)
|
||||||
|
* tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction)
|
||||||
|
* staticTokens = $(LINK2 #.staticTokens, staticTokens)
|
||||||
|
* dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens)
|
||||||
|
* possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)
|
||||||
|
* tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers)
|
||||||
* Examples:
|
* Examples:
|
||||||
* ---
|
* ---
|
||||||
* struct CalculatorLexer
|
* struct CalculatorLexer
|
||||||
* {
|
* {
|
||||||
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
|
* mixin Lexer!(Token, defaultTokenFunction, isSeparating,
|
||||||
* staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
* tokenHandlers, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||||
*
|
*
|
||||||
* this (ubyte[] bytes)
|
* this (ubyte[] bytes)
|
||||||
* {
|
* {
|
||||||
|
@ -340,12 +359,12 @@ public:
|
||||||
*
|
*
|
||||||
* Token lexNumber() pure nothrow @safe
|
* Token lexNumber() pure nothrow @safe
|
||||||
* {
|
* {
|
||||||
* ...
|
* // implementation goes here
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* Token lexWhitespace() pure nothrow @safe
|
* Token lexWhitespace() pure nothrow @safe
|
||||||
* {
|
* {
|
||||||
* ...
|
* // implementation goes here
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* Token defaultTokenFunction() pure nothrow @safe
|
* Token defaultTokenFunction() pure nothrow @safe
|
||||||
|
@ -365,8 +384,8 @@ public:
|
||||||
* ---
|
* ---
|
||||||
*/
|
*/
|
||||||
mixin template Lexer(Token, alias defaultTokenFunction,
|
mixin template Lexer(Token, alias defaultTokenFunction,
|
||||||
alias tokenSeparatingFunction, alias tokenHandlers,
|
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||||
alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)
|
alias possibleDefaultTokens, alias tokenHandlers)
|
||||||
{
|
{
|
||||||
private alias _IDType = typeof(Token.type);
|
private alias _IDType = typeof(Token.type);
|
||||||
private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol);
|
private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol);
|
||||||
|
@ -385,13 +404,13 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
||||||
return format("0x%016x", u);
|
return format("0x%016x", u);
|
||||||
}
|
}
|
||||||
|
|
||||||
static string generateByteMask(size_t l)
|
private static string generateByteMask(size_t l)
|
||||||
{
|
{
|
||||||
import std.string;
|
import std.string;
|
||||||
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
static string generateCaseStatements()
|
private static string generateCaseStatements()
|
||||||
{
|
{
|
||||||
import std.conv;
|
import std.conv;
|
||||||
import std.string;
|
import std.string;
|
||||||
|
@ -412,7 +431,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
static string printCase(string[] tokens, string[] pseudoTokens)
|
private static string printCase(string[] tokens, string[] pseudoTokens)
|
||||||
{
|
{
|
||||||
string[] t = tokens;
|
string[] t = tokens;
|
||||||
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
||||||
|
@ -509,7 +528,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements the range primitive front().
|
* Implements the range primitive _front.
|
||||||
*/
|
*/
|
||||||
ref const(Token) front() pure nothrow const @property
|
ref const(Token) front() pure nothrow const @property
|
||||||
{
|
{
|
||||||
|
@ -523,7 +542,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements the range primitive empty().
|
* Implements the range primitive _empty.
|
||||||
*/
|
*/
|
||||||
bool empty() pure const nothrow @property
|
bool empty() pure const nothrow @property
|
||||||
{
|
{
|
||||||
|
@ -606,8 +625,8 @@ struct LexerRange
|
||||||
* Params:
|
* Params:
|
||||||
* bytes = the _lexer input
|
* bytes = the _lexer input
|
||||||
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
||||||
* column = the initial column number
|
* column = the initial _column number
|
||||||
* line = the initial line number
|
* line = the initial _line number
|
||||||
*/
|
*/
|
||||||
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
||||||
{
|
{
|
||||||
|
@ -626,7 +645,7 @@ struct LexerRange
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the range to the given position
|
* Sets the range to the given position.
|
||||||
* Params: m = the position to seek to
|
* Params: m = the position to seek to
|
||||||
*/
|
*/
|
||||||
void seek(size_t m) nothrow pure @safe
|
void seek(size_t m) nothrow pure @safe
|
||||||
|
@ -635,7 +654,7 @@ struct LexerRange
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returs a slice of the input byte array betwene the given mark and the
|
* Returs a slice of the input byte array between the given mark and the
|
||||||
* current position.
|
* current position.
|
||||||
* Params m = the beginning index of the slice to return
|
* Params m = the beginning index of the slice to return
|
||||||
*/
|
*/
|
||||||
|
@ -782,7 +801,7 @@ public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caches a string.
|
* Caches a string.
|
||||||
* Params: bytes = the string to cache
|
* Params: bytes = the string to _cache
|
||||||
* Returns: A key that can be used to retrieve the cached string
|
* Returns: A key that can be used to retrieve the cached string
|
||||||
* Examples:
|
* Examples:
|
||||||
* ---
|
* ---
|
||||||
|
@ -800,8 +819,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caches a string as above, but uses the given has code instead of
|
* Caches a string as above, but uses the given hash code instead of
|
||||||
* calculating one itself. Use this alongside hashStep() can reduce the
|
* calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
|
||||||
* amount of work necessary when lexing dynamic tokens.
|
* amount of work necessary when lexing dynamic tokens.
|
||||||
*/
|
*/
|
||||||
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
||||||
|
|
Loading…
Reference in New Issue