From 2f78272fed68b797e62e272c3b9d26b1030adb45 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 23 Jan 2014 22:54:18 -0800 Subject: [PATCH] Simplified lexer interface --- stdx/d/gendoc.sh | 4 ---- stdx/d/lexer.d | 20 +++++++------------- stdx/lexer.d | 34 +++++++++++++++++----------------- 3 files changed, 24 insertions(+), 34 deletions(-) delete mode 100644 stdx/d/gendoc.sh diff --git a/stdx/d/gendoc.sh b/stdx/d/gendoc.sh deleted file mode 100644 index d500788..0000000 --- a/stdx/d/gendoc.sh +++ /dev/null @@ -1,4 +0,0 @@ -dmd -c -D lexer.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/lexer.html -I../.. -dmd -c -D ast.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/ast.html -I../.. -dmd -c -D parser.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/parser.html -I../.. - diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index 35e3580..05071e0 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -8,7 +8,7 @@ import std.range; import stdx.lexer; public import stdx.lexer : StringCache; -private enum staticTokens = [ +private enum operators = [ ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", @@ -16,13 +16,7 @@ private enum staticTokens = [ "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~=" ]; -private enum pseudoTokens = [ - "\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6", - "7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!", - "#line", "\u2028", "\u2029" -]; - -private enum possibleDefaultTokens = [ +private enum keywords = [ "abstract", "alias", "align", "asm", "assert", "auto", "body", "bool", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", "dchar", "debug", "default", @@ -82,11 +76,11 @@ private enum pseudoTokenHandlers = [ "#line", "lexSpecialTokenSequence" ]; -public alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens); -public alias str = tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens); +public alias IdType = TokenIdType!(operators, dynamicTokens, keywords); +public alias str = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords); public template tok(string token) { - alias tok = TokenId!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens, token); + alias tok = TokenId!(IdType, operators, dynamicTokens, keywords, token); } private enum extraFields = q{ string comment; @@ -405,8 +399,8 @@ public struct DLexer { import core.vararg; - mixin Lexer!(IdType, Token, lexIdentifier, staticTokens, - dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens); + mixin Lexer!(IdType, Token, lexIdentifier, isSeparating, operators, + dynamicTokens, pseudoTokenHandlers, keywords); this(ubyte[] range, const LexerConfig config, StringCache* cache) { diff --git a/stdx/lexer.d b/stdx/lexer.d index c15c8c8..850253e 100644 --- a/stdx/lexer.d +++ b/stdx/lexer.d @@ -11,13 +11,6 @@ module stdx.lexer; -import std.typecons; -import std.algorithm; -import std.range; -import std.traits; -import std.conv; -import std.math; - /** * Template for determining the type used for a token type. Selects the smallest * unsigned integral type that is able to hold the value @@ -81,6 +74,7 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens template TokenId(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol) { + import std.algorithm; static if (symbol == "") { enum id = 0; @@ -190,10 +184,13 @@ public: } mixin template Lexer(IDType, Token, alias defaultTokenFunction, - alias staticTokens, alias dynamicTokens, alias pseudoTokens, + alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias pseudoTokenHandlers, alias possibleDefaultTokens) { + static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must" + ~ " have a corresponding handler function name."); + static string generateMask(const ubyte[] arr) { import std.string; @@ -211,26 +208,28 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, return format("0x%016x", ulong.max >> ((8 - l) * 8)); } - static string generateCaseStatements(string[] tokens) + static string generateCaseStatements() { import std.conv; import std.string; + import std.range; - + string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2)); + string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq); string code; - for (size_t i = 0; i < tokens.length; i++) + for (size_t i = 0; i < allTokens.length; i++) { size_t j = i + 1; size_t o = i; - while (j < tokens.length && tokens[i][0] == tokens[j][0]) j++; - code ~= format("case 0x%02x:\n", cast(ubyte) tokens[i][0]); - code ~= printCase(tokens[i .. j]); + while (j < allTokens.length && allTokens[i][0] == allTokens[j][0]) j++; + code ~= format("case 0x%02x:\n", cast(ubyte) allTokens[i][0]); + code ~= printCase(allTokens[i .. j], pseudoTokens); i = j - 1; } return code; } - static string printCase(string[] tokens) + static string printCase(string[] tokens, string[] pseudoTokens) { string[] t = tokens; string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t)); @@ -300,7 +299,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, // possible default if (token.length <= 8) { - code ~= " if (isSeparating(" ~ text(token.length) ~ "))\n"; + code ~= " if (tokenSeparatingFunction(" ~ text(token.length) ~ "))\n"; code ~= " {\n"; code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n"; code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n"; @@ -371,7 +370,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction, return retVal; } - enum tokenSearch = generateCaseStatements(stupidToArray(uniq(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)))); + enum tokenSearch = generateCaseStatements(); static ulong getFront(const ubyte[] arr) pure nothrow @trusted { @@ -625,6 +624,7 @@ private: const(Item)* find(const(ubyte)[] bytes, uint hash) pure nothrow const @safe { + import std.algorithm; immutable size_t index = hash % buckets.length; for (const(Item)* item = buckets[index]; item !is null; item = item.next) {