Simplified lexer interface
This commit is contained in:
parent
76fc800d30
commit
2f78272fed
|
@ -1,4 +0,0 @@
|
||||||
dmd -c -D lexer.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/lexer.html -I../..
|
|
||||||
dmd -c -D ast.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/ast.html -I../..
|
|
||||||
dmd -c -D parser.d ../../../d-programming-language.org/std.ddoc -Df../../../hackerpilot.github.com/experimental/std_lexer/phobos/parser.html -I../..
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ import std.range;
|
||||||
import stdx.lexer;
|
import stdx.lexer;
|
||||||
public import stdx.lexer : StringCache;
|
public import stdx.lexer : StringCache;
|
||||||
|
|
||||||
private enum staticTokens = [
|
private enum operators = [
|
||||||
",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
|
",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
|
||||||
"!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
|
"!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
|
||||||
"+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
|
"+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
|
||||||
|
@ -16,13 +16,7 @@ private enum staticTokens = [
|
||||||
"^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
|
"^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
|
||||||
];
|
];
|
||||||
|
|
||||||
private enum pseudoTokens = [
|
private enum keywords = [
|
||||||
"\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6",
|
|
||||||
"7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
|
|
||||||
"#line", "\u2028", "\u2029"
|
|
||||||
];
|
|
||||||
|
|
||||||
private enum possibleDefaultTokens = [
|
|
||||||
"abstract", "alias", "align", "asm", "assert", "auto", "body", "bool",
|
"abstract", "alias", "align", "asm", "assert", "auto", "body", "bool",
|
||||||
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
|
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
|
||||||
"char", "class", "const", "continue", "creal", "dchar", "debug", "default",
|
"char", "class", "const", "continue", "creal", "dchar", "debug", "default",
|
||||||
|
@ -82,11 +76,11 @@ private enum pseudoTokenHandlers = [
|
||||||
"#line", "lexSpecialTokenSequence"
|
"#line", "lexSpecialTokenSequence"
|
||||||
];
|
];
|
||||||
|
|
||||||
public alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
public alias IdType = TokenIdType!(operators, dynamicTokens, keywords);
|
||||||
public alias str = tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
public alias str = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords);
|
||||||
public template tok(string token)
|
public template tok(string token)
|
||||||
{
|
{
|
||||||
alias tok = TokenId!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens, token);
|
alias tok = TokenId!(IdType, operators, dynamicTokens, keywords, token);
|
||||||
}
|
}
|
||||||
private enum extraFields = q{
|
private enum extraFields = q{
|
||||||
string comment;
|
string comment;
|
||||||
|
@ -405,8 +399,8 @@ public struct DLexer
|
||||||
{
|
{
|
||||||
import core.vararg;
|
import core.vararg;
|
||||||
|
|
||||||
mixin Lexer!(IdType, Token, lexIdentifier, staticTokens,
|
mixin Lexer!(IdType, Token, lexIdentifier, isSeparating, operators,
|
||||||
dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens);
|
dynamicTokens, pseudoTokenHandlers, keywords);
|
||||||
|
|
||||||
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
||||||
{
|
{
|
||||||
|
|
34
stdx/lexer.d
34
stdx/lexer.d
|
@ -11,13 +11,6 @@
|
||||||
|
|
||||||
module stdx.lexer;
|
module stdx.lexer;
|
||||||
|
|
||||||
import std.typecons;
|
|
||||||
import std.algorithm;
|
|
||||||
import std.range;
|
|
||||||
import std.traits;
|
|
||||||
import std.conv;
|
|
||||||
import std.math;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Template for determining the type used for a token type. Selects the smallest
|
* Template for determining the type used for a token type. Selects the smallest
|
||||||
* unsigned integral type that is able to hold the value
|
* unsigned integral type that is able to hold the value
|
||||||
|
@ -81,6 +74,7 @@ string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
|
||||||
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
||||||
alias possibleDefaultTokens, string symbol)
|
alias possibleDefaultTokens, string symbol)
|
||||||
{
|
{
|
||||||
|
import std.algorithm;
|
||||||
static if (symbol == "")
|
static if (symbol == "")
|
||||||
{
|
{
|
||||||
enum id = 0;
|
enum id = 0;
|
||||||
|
@ -190,10 +184,13 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
alias staticTokens, alias dynamicTokens, alias pseudoTokens,
|
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||||
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static assert (pseudoTokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
||||||
|
~ " have a corresponding handler function name.");
|
||||||
|
|
||||||
static string generateMask(const ubyte[] arr)
|
static string generateMask(const ubyte[] arr)
|
||||||
{
|
{
|
||||||
import std.string;
|
import std.string;
|
||||||
|
@ -211,26 +208,28 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
static string generateCaseStatements(string[] tokens)
|
static string generateCaseStatements()
|
||||||
{
|
{
|
||||||
import std.conv;
|
import std.conv;
|
||||||
import std.string;
|
import std.string;
|
||||||
|
import std.range;
|
||||||
|
|
||||||
|
string[] pseudoTokens = stupidToArray(pseudoTokenHandlers.stride(2));
|
||||||
|
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
|
||||||
string code;
|
string code;
|
||||||
for (size_t i = 0; i < tokens.length; i++)
|
for (size_t i = 0; i < allTokens.length; i++)
|
||||||
{
|
{
|
||||||
size_t j = i + 1;
|
size_t j = i + 1;
|
||||||
size_t o = i;
|
size_t o = i;
|
||||||
while (j < tokens.length && tokens[i][0] == tokens[j][0]) j++;
|
while (j < allTokens.length && allTokens[i][0] == allTokens[j][0]) j++;
|
||||||
code ~= format("case 0x%02x:\n", cast(ubyte) tokens[i][0]);
|
code ~= format("case 0x%02x:\n", cast(ubyte) allTokens[i][0]);
|
||||||
code ~= printCase(tokens[i .. j]);
|
code ~= printCase(allTokens[i .. j], pseudoTokens);
|
||||||
i = j - 1;
|
i = j - 1;
|
||||||
}
|
}
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
static string printCase(string[] tokens)
|
static string printCase(string[] tokens, string[] pseudoTokens)
|
||||||
{
|
{
|
||||||
string[] t = tokens;
|
string[] t = tokens;
|
||||||
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
||||||
|
@ -300,7 +299,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
// possible default
|
// possible default
|
||||||
if (token.length <= 8)
|
if (token.length <= 8)
|
||||||
{
|
{
|
||||||
code ~= " if (isSeparating(" ~ text(token.length) ~ "))\n";
|
code ~= " if (tokenSeparatingFunction(" ~ text(token.length) ~ "))\n";
|
||||||
code ~= " {\n";
|
code ~= " {\n";
|
||||||
code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||||
code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||||
|
@ -371,7 +370,7 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum tokenSearch = generateCaseStatements(stupidToArray(uniq(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens))));
|
enum tokenSearch = generateCaseStatements();
|
||||||
|
|
||||||
static ulong getFront(const ubyte[] arr) pure nothrow @trusted
|
static ulong getFront(const ubyte[] arr) pure nothrow @trusted
|
||||||
{
|
{
|
||||||
|
@ -625,6 +624,7 @@ private:
|
||||||
|
|
||||||
const(Item)* find(const(ubyte)[] bytes, uint hash) pure nothrow const @safe
|
const(Item)* find(const(ubyte)[] bytes, uint hash) pure nothrow const @safe
|
||||||
{
|
{
|
||||||
|
import std.algorithm;
|
||||||
immutable size_t index = hash % buckets.length;
|
immutable size_t index = hash % buckets.length;
|
||||||
for (const(Item)* item = buckets[index]; item !is null; item = item.next)
|
for (const(Item)* item = buckets[index]; item !is null; item = item.next)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue