From 2eba33c1d3a30a0f34d50b4447f0fc797a7cbc57 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 20 Feb 2014 01:04:48 -0800 Subject: [PATCH 1/3] Updated DDoc, made template order more consistent. --- stdx/d/lexer.d | 4 +-- stdx/lexer.d | 83 +++++++++++++++++++++++++++++++------------------- 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index 2b355e0..d382146 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -399,8 +399,8 @@ public struct DLexer { import core.vararg; - mixin Lexer!(Token, lexIdentifier, isSeparating, pseudoTokenHandlers, - operators, dynamicTokens, keywords); + mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens, + keywords, pseudoTokenHandlers); this(ubyte[] range, const LexerConfig config, StringCache* cache) { diff --git a/stdx/lexer.d b/stdx/lexer.d index 81faef6..b9e9f4b 100644 --- a/stdx/lexer.d +++ b/stdx/lexer.d @@ -2,7 +2,7 @@ /** * $(H2 Summary) - * This module contains a range-based _lexer generator. + * This module contains a range-based compile-time _lexer generator. * * $(H2 Overview) * The _lexer generator consists of a template mixin, $(LREF Lexer), along with @@ -12,7 +12,10 @@ * $(OL * $(LI Create the string array costants for your language. * $(UL - * $(LI $(LINK2 #.StringConstants, String Constants)) + * $(LI $(LINK2 #.staticTokens, staticTokens)) + * $(LI $(LINK2 #.dynamicTokens, dynamicTokens)) + * $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)) + * $(LI $(LINK2 #.tokenHandlers, tokenHandlers)) * )) * $(LI Create aliases for the various token and token identifier types * specific to your language. @@ -33,25 +36,33 @@ * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/stdx/d/lexer.d, here).) * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).) * ) - * $(DDOC_ANCHOR StringConstants) $(H2 String Constants) + * $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions) * $(DL - * $(DT $(B staticTokens)) + * $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction) + * $(DD A function that serves as the default token lexing function. For most + * languages this will be the identifier lexing function.)) + * $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction)) + * $(DD A function that is able to determine if an identifier/keyword has come + * to an end. This function must return bool and take a single size_t + * argument representing the number of bytes to skip over before looking for + * a separating character.) + * $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens)) * $(DD A listing of the tokens whose exact value never changes and which cannot * possibly be a token handled by the default token lexing function. The * most common example of this kind of token is an operator such as * $(D_STRING "*"), or $(D_STRING "-") in a programming language.) - * $(DT $(B dynamicTokens)) + * $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens)) * $(DD A listing of tokens whose value is variable, such as whitespace, * identifiers, number literals, and string literals.) - * $(DT $(B possibleDefaultTokens)) + * $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens)) * $(DD A listing of tokens that could posibly be one of the tokens handled by * the default token handling function. An common example of this is * a keyword such as $(D_STRING "for"), which looks like the beginning of - * the identifier $(D_STRING "fortunate"). isSeparating is called to - * determine if the character after the $(D_STRING 'r') separates the - * identifier, indicating that the token is $(D_STRING "for"), or if lexing - * should be turned over to the defaultTokenFunction.) - * $(DT $(B tokenHandlers)) + * the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is + * called to determine if the character after the $(D_STRING 'r') separates + * the identifier, indicating that the token is $(D_STRING "for"), or if + * lexing should be turned over to the $(B defaultTokenFunction).) + * $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers)) * $(DD A mapping of prefixes to custom token handling function names. The * generated _lexer will search for the even-index elements of this array, * and then call the function whose name is the element immedately after the @@ -155,7 +166,7 @@ unittest { /// Fix https://github.com/Hackerpilot/Dscanner/issues/96 alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]); - alias tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token); + enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token); alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]); static assert(str(tok!"foo") == "foo"); @@ -167,8 +178,8 @@ unittest * Generates the token type identifier for the given symbol. There are two * special cases: * $(UL - * $(LI If symbol is "", then the token identifier will be 0) - * $(LI If symbol is "\0", then the token identifier will be the maximum + * $(LI If symbol is $(D_STRING ""), then the token identifier will be 0) + * $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum * valid token type identifier) * ) * In all cases this template will alias itself to a constant of type IdType. @@ -320,12 +331,20 @@ public: * $(LI A constructor that initializes the range field as well as calls * popFront() exactly once (to initialize the _front field).) * ) + * Params: + * Token = $(LREF TokenStructure) + * defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction) + * tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction) + * staticTokens = $(LINK2 #.staticTokens, staticTokens) + * dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens) + * possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens) + * tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers) * Examples: * --- * struct CalculatorLexer * { - * mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating, - * staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens); + * mixin Lexer!(Token, defaultTokenFunction, isSeparating, + * tokenHandlers, staticTokens, dynamicTokens, possibleDefaultTokens); * * this (ubyte[] bytes) * { @@ -340,12 +359,12 @@ public: * * Token lexNumber() pure nothrow @safe * { - * ... + * // implementation goes here * } * * Token lexWhitespace() pure nothrow @safe * { - * ... + * // implementation goes here * } * * Token defaultTokenFunction() pure nothrow @safe @@ -365,8 +384,8 @@ public: * --- */ mixin template Lexer(Token, alias defaultTokenFunction, - alias tokenSeparatingFunction, alias tokenHandlers, - alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens) + alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, + alias possibleDefaultTokens, alias tokenHandlers) { private alias _IDType = typeof(Token.type); private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol); @@ -385,13 +404,13 @@ mixin template Lexer(Token, alias defaultTokenFunction, return format("0x%016x", u); } - static string generateByteMask(size_t l) + private static string generateByteMask(size_t l) { import std.string; return format("0x%016x", ulong.max >> ((8 - l) * 8)); } - static string generateCaseStatements() + private static string generateCaseStatements() { import std.conv; import std.string; @@ -412,7 +431,7 @@ mixin template Lexer(Token, alias defaultTokenFunction, return code; } - static string printCase(string[] tokens, string[] pseudoTokens) + private static string printCase(string[] tokens, string[] pseudoTokens) { string[] t = tokens; string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t)); @@ -509,7 +528,7 @@ mixin template Lexer(Token, alias defaultTokenFunction, } /** - * Implements the range primitive front(). + * Implements the range primitive _front. */ ref const(Token) front() pure nothrow const @property { @@ -523,7 +542,7 @@ mixin template Lexer(Token, alias defaultTokenFunction, } /** - * Implements the range primitive empty(). + * Implements the range primitive _empty. */ bool empty() pure const nothrow @property { @@ -606,8 +625,8 @@ struct LexerRange * Params: * bytes = the _lexer input * index = the initial offset from the beginning of $(D_PARAM bytes) - * column = the initial column number - * line = the initial line number + * column = the initial _column number + * line = the initial _line number */ this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe { @@ -626,7 +645,7 @@ struct LexerRange } /** - * Sets the range to the given position + * Sets the range to the given position. * Params: m = the position to seek to */ void seek(size_t m) nothrow pure @safe @@ -635,7 +654,7 @@ struct LexerRange } /** - * Returs a slice of the input byte array betwene the given mark and the + * Returs a slice of the input byte array between the given mark and the * current position. * Params m = the beginning index of the slice to return */ @@ -782,7 +801,7 @@ public: /** * Caches a string. - * Params: bytes = the string to cache + * Params: bytes = the string to _cache * Returns: A key that can be used to retrieve the cached string * Examples: * --- @@ -800,8 +819,8 @@ public: } /** - * Caches a string as above, but uses the given has code instead of - * calculating one itself. Use this alongside hashStep() can reduce the + * Caches a string as above, but uses the given hash code instead of + * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the * amount of work necessary when lexing dynamic tokens. */ size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe From 65bed05fa72f8831a95007af4c499358a5186fae Mon Sep 17 00:00:00 2001 From: Callum Anderson Date: Fri, 21 Feb 2014 16:02:04 +1100 Subject: [PATCH 2/3] Case range statement - low node not set --- stdx/d/parser.d | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stdx/d/parser.d b/stdx/d/parser.d index 76a1ca2..bfcadc9 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -973,6 +973,8 @@ alias core.sys.posix.stdio.fileno fileno; expect(tok!"case"); node.low = parseAssignExpression(); } + else + node.low = low; if (expect(tok!":") is null) return null; if (expect(tok!"..") is null) return null; expect(tok!"case"); From 4c05a096630cc751970cc371725001e96b22be94 Mon Sep 17 00:00:00 2001 From: Callum Anderson Date: Fri, 21 Feb 2014 16:50:03 +1100 Subject: [PATCH 3/3] debug/version = blah is treated as a statement --- stdx/d/parser.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdx/d/parser.d b/stdx/d/parser.d index 76a1ca2..b28994d 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -6097,7 +6097,7 @@ protected: return !peekIs(tok!"switch"); case tok!"debug": case tok!"version": - return peekIs(tok!"="); + return !peekIs(tok!"="); case tok!"synchronized": if (peekIs(tok!"(")) return false;