Updated DDoc, made template order more consistent.

This commit is contained in:
Hackerpilot 2014-02-20 01:04:48 -08:00
parent c56716e096
commit 2eba33c1d3
2 changed files with 53 additions and 34 deletions

View File

@ -399,8 +399,8 @@ public struct DLexer
{ {
import core.vararg; import core.vararg;
mixin Lexer!(Token, lexIdentifier, isSeparating, pseudoTokenHandlers, mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
operators, dynamicTokens, keywords); keywords, pseudoTokenHandlers);
this(ubyte[] range, const LexerConfig config, StringCache* cache) this(ubyte[] range, const LexerConfig config, StringCache* cache)
{ {

View File

@ -2,7 +2,7 @@
/** /**
* $(H2 Summary) * $(H2 Summary)
* This module contains a range-based _lexer generator. * This module contains a range-based compile-time _lexer generator.
* *
* $(H2 Overview) * $(H2 Overview)
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with * The _lexer generator consists of a template mixin, $(LREF Lexer), along with
@ -12,7 +12,10 @@
* $(OL * $(OL
* $(LI Create the string array costants for your language. * $(LI Create the string array costants for your language.
* $(UL * $(UL
* $(LI $(LINK2 #.StringConstants, String Constants)) * $(LI $(LINK2 #.staticTokens, staticTokens))
* $(LI $(LINK2 #.dynamicTokens, dynamicTokens))
* $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens))
* $(LI $(LINK2 #.tokenHandlers, tokenHandlers))
* )) * ))
* $(LI Create aliases for the various token and token identifier types * $(LI Create aliases for the various token and token identifier types
* specific to your language. * specific to your language.
@ -33,25 +36,33 @@
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).) * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).)
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).) * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
* ) * )
* $(DDOC_ANCHOR StringConstants) $(H2 String Constants) * $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions)
* $(DL * $(DL
* $(DT $(B staticTokens)) * $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction)
* $(DD A function that serves as the default token lexing function. For most
* languages this will be the identifier lexing function.))
* $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction))
* $(DD A function that is able to determine if an identifier/keyword has come
* to an end. This function must return bool and take a single size_t
* argument representing the number of bytes to skip over before looking for
* a separating character.)
* $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens))
* $(DD A listing of the tokens whose exact value never changes and which cannot * $(DD A listing of the tokens whose exact value never changes and which cannot
* possibly be a token handled by the default token lexing function. The * possibly be a token handled by the default token lexing function. The
* most common example of this kind of token is an operator such as * most common example of this kind of token is an operator such as
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.) * $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
* $(DT $(B dynamicTokens)) * $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens))
* $(DD A listing of tokens whose value is variable, such as whitespace, * $(DD A listing of tokens whose value is variable, such as whitespace,
* identifiers, number literals, and string literals.) * identifiers, number literals, and string literals.)
* $(DT $(B possibleDefaultTokens)) * $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens))
* $(DD A listing of tokens that could posibly be one of the tokens handled by * $(DD A listing of tokens that could posibly be one of the tokens handled by
* the default token handling function. An common example of this is * the default token handling function. An common example of this is
* a keyword such as $(D_STRING "for"), which looks like the beginning of * a keyword such as $(D_STRING "for"), which looks like the beginning of
* the identifier $(D_STRING "fortunate"). isSeparating is called to * the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is
* determine if the character after the $(D_STRING 'r') separates the * called to determine if the character after the $(D_STRING 'r') separates
* identifier, indicating that the token is $(D_STRING "for"), or if lexing * the identifier, indicating that the token is $(D_STRING "for"), or if
* should be turned over to the defaultTokenFunction.) * lexing should be turned over to the $(B defaultTokenFunction).)
* $(DT $(B tokenHandlers)) * $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers))
* $(DD A mapping of prefixes to custom token handling function names. The * $(DD A mapping of prefixes to custom token handling function names. The
* generated _lexer will search for the even-index elements of this array, * generated _lexer will search for the even-index elements of this array,
* and then call the function whose name is the element immedately after the * and then call the function whose name is the element immedately after the
@ -155,7 +166,7 @@ unittest
{ {
/// Fix https://github.com/Hackerpilot/Dscanner/issues/96 /// Fix https://github.com/Hackerpilot/Dscanner/issues/96
alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]); alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]);
alias tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token); enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]); alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]);
static assert(str(tok!"foo") == "foo"); static assert(str(tok!"foo") == "foo");
@ -167,8 +178,8 @@ unittest
* Generates the token type identifier for the given symbol. There are two * Generates the token type identifier for the given symbol. There are two
* special cases: * special cases:
* $(UL * $(UL
* $(LI If symbol is "", then the token identifier will be 0) * $(LI If symbol is $(D_STRING ""), then the token identifier will be 0)
* $(LI If symbol is "\0", then the token identifier will be the maximum * $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum
* valid token type identifier) * valid token type identifier)
* ) * )
* In all cases this template will alias itself to a constant of type IdType. * In all cases this template will alias itself to a constant of type IdType.
@ -320,12 +331,20 @@ public:
* $(LI A constructor that initializes the range field as well as calls * $(LI A constructor that initializes the range field as well as calls
* popFront() exactly once (to initialize the _front field).) * popFront() exactly once (to initialize the _front field).)
* ) * )
* Params:
* Token = $(LREF TokenStructure)
* defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction)
* tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction)
* staticTokens = $(LINK2 #.staticTokens, staticTokens)
* dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens)
* possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)
* tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers)
* Examples: * Examples:
* --- * ---
* struct CalculatorLexer * struct CalculatorLexer
* { * {
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating, * mixin Lexer!(Token, defaultTokenFunction, isSeparating,
* staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens); * tokenHandlers, staticTokens, dynamicTokens, possibleDefaultTokens);
* *
* this (ubyte[] bytes) * this (ubyte[] bytes)
* { * {
@ -340,12 +359,12 @@ public:
* *
* Token lexNumber() pure nothrow @safe * Token lexNumber() pure nothrow @safe
* { * {
* ... * // implementation goes here
* } * }
* *
* Token lexWhitespace() pure nothrow @safe * Token lexWhitespace() pure nothrow @safe
* { * {
* ... * // implementation goes here
* } * }
* *
* Token defaultTokenFunction() pure nothrow @safe * Token defaultTokenFunction() pure nothrow @safe
@ -365,8 +384,8 @@ public:
* --- * ---
*/ */
mixin template Lexer(Token, alias defaultTokenFunction, mixin template Lexer(Token, alias defaultTokenFunction,
alias tokenSeparatingFunction, alias tokenHandlers, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens) alias possibleDefaultTokens, alias tokenHandlers)
{ {
private alias _IDType = typeof(Token.type); private alias _IDType = typeof(Token.type);
private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol); private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol);
@ -385,13 +404,13 @@ mixin template Lexer(Token, alias defaultTokenFunction,
return format("0x%016x", u); return format("0x%016x", u);
} }
static string generateByteMask(size_t l) private static string generateByteMask(size_t l)
{ {
import std.string; import std.string;
return format("0x%016x", ulong.max >> ((8 - l) * 8)); return format("0x%016x", ulong.max >> ((8 - l) * 8));
} }
static string generateCaseStatements() private static string generateCaseStatements()
{ {
import std.conv; import std.conv;
import std.string; import std.string;
@ -412,7 +431,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
return code; return code;
} }
static string printCase(string[] tokens, string[] pseudoTokens) private static string printCase(string[] tokens, string[] pseudoTokens)
{ {
string[] t = tokens; string[] t = tokens;
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t)); string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
@ -509,7 +528,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
} }
/** /**
* Implements the range primitive front(). * Implements the range primitive _front.
*/ */
ref const(Token) front() pure nothrow const @property ref const(Token) front() pure nothrow const @property
{ {
@ -523,7 +542,7 @@ mixin template Lexer(Token, alias defaultTokenFunction,
} }
/** /**
* Implements the range primitive empty(). * Implements the range primitive _empty.
*/ */
bool empty() pure const nothrow @property bool empty() pure const nothrow @property
{ {
@ -606,8 +625,8 @@ struct LexerRange
* Params: * Params:
* bytes = the _lexer input * bytes = the _lexer input
* index = the initial offset from the beginning of $(D_PARAM bytes) * index = the initial offset from the beginning of $(D_PARAM bytes)
* column = the initial column number * column = the initial _column number
* line = the initial line number * line = the initial _line number
*/ */
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
{ {
@ -626,7 +645,7 @@ struct LexerRange
} }
/** /**
* Sets the range to the given position * Sets the range to the given position.
* Params: m = the position to seek to * Params: m = the position to seek to
*/ */
void seek(size_t m) nothrow pure @safe void seek(size_t m) nothrow pure @safe
@ -635,7 +654,7 @@ struct LexerRange
} }
/** /**
* Returs a slice of the input byte array betwene the given mark and the * Returs a slice of the input byte array between the given mark and the
* current position. * current position.
* Params m = the beginning index of the slice to return * Params m = the beginning index of the slice to return
*/ */
@ -782,7 +801,7 @@ public:
/** /**
* Caches a string. * Caches a string.
* Params: bytes = the string to cache * Params: bytes = the string to _cache
* Returns: A key that can be used to retrieve the cached string * Returns: A key that can be used to retrieve the cached string
* Examples: * Examples:
* --- * ---
@ -800,8 +819,8 @@ public:
} }
/** /**
* Caches a string as above, but uses the given has code instead of * Caches a string as above, but uses the given hash code instead of
* calculating one itself. Use this alongside hashStep() can reduce the * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
* amount of work necessary when lexing dynamic tokens. * amount of work necessary when lexing dynamic tokens.
*/ */
size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe