Updated DDoc, made template order more consistent.
This commit is contained in:
parent
c56716e096
commit
2eba33c1d3
|
@ -399,8 +399,8 @@ public struct DLexer
|
|||
{
|
||||
import core.vararg;
|
||||
|
||||
mixin Lexer!(Token, lexIdentifier, isSeparating, pseudoTokenHandlers,
|
||||
operators, dynamicTokens, keywords);
|
||||
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
|
||||
keywords, pseudoTokenHandlers);
|
||||
|
||||
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
||||
{
|
||||
|
|
83
stdx/lexer.d
83
stdx/lexer.d
|
@ -2,7 +2,7 @@
|
|||
|
||||
/**
|
||||
* $(H2 Summary)
|
||||
* This module contains a range-based _lexer generator.
|
||||
* This module contains a range-based compile-time _lexer generator.
|
||||
*
|
||||
* $(H2 Overview)
|
||||
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
||||
|
@ -12,7 +12,10 @@
|
|||
* $(OL
|
||||
* $(LI Create the string array costants for your language.
|
||||
* $(UL
|
||||
* $(LI $(LINK2 #.StringConstants, String Constants))
|
||||
* $(LI $(LINK2 #.staticTokens, staticTokens))
|
||||
* $(LI $(LINK2 #.dynamicTokens, dynamicTokens))
|
||||
* $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens))
|
||||
* $(LI $(LINK2 #.tokenHandlers, tokenHandlers))
|
||||
* ))
|
||||
* $(LI Create aliases for the various token and token identifier types
|
||||
* specific to your language.
|
||||
|
@ -33,25 +36,33 @@
|
|||
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
|
||||
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
||||
* )
|
||||
* $(DDOC_ANCHOR StringConstants) $(H2 String Constants)
|
||||
* $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions)
|
||||
* $(DL
|
||||
* $(DT $(B staticTokens))
|
||||
* $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction)
|
||||
* $(DD A function that serves as the default token lexing function. For most
|
||||
* languages this will be the identifier lexing function.))
|
||||
* $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction))
|
||||
* $(DD A function that is able to determine if an identifier/keyword has come
|
||||
* to an end. This function must return bool and take a single size_t
|
||||
* argument representing the number of bytes to skip over before looking for
|
||||
* a separating character.)
|
||||
* $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens))
|
||||
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
||||
* possibly be a token handled by the default token lexing function. The
|
||||
* most common example of this kind of token is an operator such as
|
||||
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
||||
* $(DT $(B dynamicTokens))
|
||||
* $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens))
|
||||
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
||||
* identifiers, number literals, and string literals.)
|
||||
* $(DT $(B possibleDefaultTokens))
|
||||
* $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens))
|
||||
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
||||
* the default token handling function. An common example of this is
|
||||
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
||||
* the identifier $(D_STRING "fortunate"). isSeparating is called to
|
||||
* determine if the character after the $(D_STRING 'r') separates the
|
||||
* identifier, indicating that the token is $(D_STRING "for"), or if lexing
|
||||
* should be turned over to the defaultTokenFunction.)
|
||||
* $(DT $(B tokenHandlers))
|
||||
* the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is
|
||||
* called to determine if the character after the $(D_STRING 'r') separates
|
||||
* the identifier, indicating that the token is $(D_STRING "for"), or if
|
||||
* lexing should be turned over to the $(B defaultTokenFunction).)
|
||||
* $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers))
|
||||
* $(DD A mapping of prefixes to custom token handling function names. The
|
||||
* generated _lexer will search for the even-index elements of this array,
|
||||
* and then call the function whose name is the element immedately after the
|
||||
|
@ -155,7 +166,7 @@ unittest
|
|||
{
|
||||
/// Fix https://github.com/Hackerpilot/Dscanner/issues/96
|
||||
alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]);
|
||||
alias tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
|
||||
enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
|
||||
alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]);
|
||||
|
||||
static assert(str(tok!"foo") == "foo");
|
||||
|
@ -167,8 +178,8 @@ unittest
|
|||
* Generates the token type identifier for the given symbol. There are two
|
||||
* special cases:
|
||||
* $(UL
|
||||
* $(LI If symbol is "", then the token identifier will be 0)
|
||||
* $(LI If symbol is "\0", then the token identifier will be the maximum
|
||||
* $(LI If symbol is $(D_STRING ""), then the token identifier will be 0)
|
||||
* $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum
|
||||
* valid token type identifier)
|
||||
* )
|
||||
* In all cases this template will alias itself to a constant of type IdType.
|
||||
|
@ -320,12 +331,20 @@ public:
|
|||
* $(LI A constructor that initializes the range field as well as calls
|
||||
* popFront() exactly once (to initialize the _front field).)
|
||||
* )
|
||||
* Params:
|
||||
* Token = $(LREF TokenStructure)
|
||||
* defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction)
|
||||
* tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction)
|
||||
* staticTokens = $(LINK2 #.staticTokens, staticTokens)
|
||||
* dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens)
|
||||
* possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)
|
||||
* tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers)
|
||||
* Examples:
|
||||
* ---
|
||||
* struct CalculatorLexer
|
||||
* {
|
||||
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||
* staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
|
||||
* mixin Lexer!(Token, defaultTokenFunction, isSeparating,
|
||||
* tokenHandlers, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
*
|
||||
* this (ubyte[] bytes)
|
||||
* {
|
||||
|
@ -340,12 +359,12 @@ public:
|
|||
*
|
||||
* Token lexNumber() pure nothrow @safe
|
||||
* {
|
||||
* ...
|
||||
* // implementation goes here
|
||||
* }
|
||||
*
|
||||
* Token lexWhitespace() pure nothrow @safe
|
||||
* {
|
||||
* ...
|
||||
* // implementation goes here
|
||||
* }
|
||||
*
|
||||
* Token defaultTokenFunction() pure nothrow @safe
|
||||
|
@ -365,8 +384,8 @@ public:
|
|||
* ---
|
||||
*/
|
||||
mixin template Lexer(Token, alias defaultTokenFunction,
|
||||
alias tokenSeparatingFunction, alias tokenHandlers,
|
||||
alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)
|
||||
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens, alias tokenHandlers)
|
||||
{
|
||||
private alias _IDType = typeof(Token.type);
|
||||
private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol);
|
||||
|
@ -385,13 +404,13 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
|||
return format("0x%016x", u);
|
||||
}
|
||||
|
||||
static string generateByteMask(size_t l)
|
||||
private static string generateByteMask(size_t l)
|
||||
{
|
||||
import std.string;
|
||||
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
||||
}
|
||||
|
||||
static string generateCaseStatements()
|
||||
private static string generateCaseStatements()
|
||||
{
|
||||
import std.conv;
|
||||
import std.string;
|
||||
|
@ -412,7 +431,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
|||
return code;
|
||||
}
|
||||
|
||||
static string printCase(string[] tokens, string[] pseudoTokens)
|
||||
private static string printCase(string[] tokens, string[] pseudoTokens)
|
||||
{
|
||||
string[] t = tokens;
|
||||
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
||||
|
@ -509,7 +528,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
|||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive front().
|
||||
* Implements the range primitive _front.
|
||||
*/
|
||||
ref const(Token) front() pure nothrow const @property
|
||||
{
|
||||
|
@ -523,7 +542,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
|
|||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive empty().
|
||||
* Implements the range primitive _empty.
|
||||
*/
|
||||
bool empty() pure const nothrow @property
|
||||
{
|
||||
|
@ -606,8 +625,8 @@ struct LexerRange
|
|||
* Params:
|
||||
* bytes = the _lexer input
|
||||
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
||||
* column = the initial column number
|
||||
* line = the initial line number
|
||||
* column = the initial _column number
|
||||
* line = the initial _line number
|
||||
*/
|
||||
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
||||
{
|
||||
|
@ -626,7 +645,7 @@ struct LexerRange
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the range to the given position
|
||||
* Sets the range to the given position.
|
||||
* Params: m = the position to seek to
|
||||
*/
|
||||
void seek(size_t m) nothrow pure @safe
|
||||
|
@ -635,7 +654,7 @@ struct LexerRange
|
|||
}
|
||||
|
||||
/**
|
||||
* Returs a slice of the input byte array betwene the given mark and the
|
||||
* Returs a slice of the input byte array between the given mark and the
|
||||
* current position.
|
||||
* Params m = the beginning index of the slice to return
|
||||
*/
|
||||
|
@ -782,7 +801,7 @@ public:
|
|||
|
||||
/**
|
||||
* Caches a string.
|
||||
* Params: bytes = the string to cache
|
||||
* Params: bytes = the string to _cache
|
||||
* Returns: A key that can be used to retrieve the cached string
|
||||
* Examples:
|
||||
* ---
|
||||
|
@ -800,8 +819,8 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Caches a string as above, but uses the given has code instead of
|
||||
* calculating one itself. Use this alongside hashStep() can reduce the
|
||||
* Caches a string as above, but uses the given hash code instead of
|
||||
* calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
|
||||
* amount of work necessary when lexing dynamic tokens.
|
||||
*/
|
||||
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
|
||||
|
|
Loading…
Reference in New Issue