Project restructuring
This commit is contained in:
parent
fc578105df
commit
84a0e706c2
|
@ -0,0 +1,4 @@
|
|||
[submodule "libdparse"]
|
||||
path = libdparse
|
||||
url = https://github.com/Hackerpilot/libdparse.git
|
||||
branch = master
|
|
@ -27,6 +27,7 @@ import analysis.opequals_without_tohash;
|
|||
|
||||
enum AnalyzerCheck : uint
|
||||
{
|
||||
none = 0b00000000_00000000,
|
||||
style_check = 0b00000000_00000001,
|
||||
enum_array_literal_check = 0b00000000_00000010,
|
||||
exception_check = 0b00000000_00000100,
|
||||
|
@ -76,12 +77,11 @@ string[] analyze(string fileName, ubyte[] code, AnalyzerCheck analyzers, bool st
|
|||
import std.parallelism;
|
||||
|
||||
auto lexer = byToken(code);
|
||||
auto app = appender!(typeof(lexer.front)[])();
|
||||
while (!lexer.empty)
|
||||
{
|
||||
app.put(lexer.front);
|
||||
lexer.popFront();
|
||||
}
|
||||
LexerConfig config;
|
||||
config.fileName = fileName;
|
||||
config.stringBehavior = StringBehavior.source;
|
||||
StringCache cache = StringCache(StringCache.defaultBucketCount);
|
||||
const(Token)[] tokens = getTokensForParser(code, config, &cache);
|
||||
|
||||
foreach (message; lexer.messages)
|
||||
{
|
||||
|
@ -90,7 +90,7 @@ string[] analyze(string fileName, ubyte[] code, AnalyzerCheck analyzers, bool st
|
|||
}
|
||||
|
||||
ParseAllocator p = new ParseAllocator;
|
||||
Module m = parseModule(app.data, fileName, p, &messageFunction);
|
||||
Module m = parseModule(tokens, fileName, p, &messageFunction);
|
||||
|
||||
if (!staticAnalyze)
|
||||
return null;
|
||||
|
|
|
@ -10,6 +10,8 @@ import std.d.lexer;
|
|||
import analysis.base;
|
||||
import std.container;
|
||||
|
||||
import std.stdio;
|
||||
|
||||
/**
|
||||
* Checks for unused variables.
|
||||
*/
|
||||
|
@ -53,7 +55,7 @@ class UnusedVariableCheck : BaseAnalyzer
|
|||
popScope();
|
||||
}
|
||||
|
||||
override void visit(const Type type) {}
|
||||
// override void visit(const Type type) {}
|
||||
|
||||
mixin template PartsUseVariables(NodeType)
|
||||
{
|
||||
|
@ -242,7 +244,6 @@ class UnusedVariableCheck : BaseAnalyzer
|
|||
{
|
||||
import std.algorithm;
|
||||
import std.array;
|
||||
// import std.stdio;
|
||||
if (parameter.name != tok!"")
|
||||
{
|
||||
// stderr.writeln("Adding parameter ", parameter.name.text);
|
||||
|
@ -280,7 +281,6 @@ class UnusedVariableCheck : BaseAnalyzer
|
|||
void variableDeclared(string name, size_t line, size_t column,
|
||||
bool isParameter, bool isRef)
|
||||
{
|
||||
// import std.stdio;
|
||||
if (inAggregateScope)
|
||||
return;
|
||||
// stderr.writeln("Adding ", name, " ", isParameter, " ", isRef);
|
||||
|
@ -289,6 +289,7 @@ class UnusedVariableCheck : BaseAnalyzer
|
|||
|
||||
void variableUsed(string name)
|
||||
{
|
||||
// writeln("Marking ", name, " used");
|
||||
size_t treeIndex = tree.length - 1;
|
||||
auto uu = UnUsed(name);
|
||||
while (true)
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
import std.d.lexer;
|
||||
import std.d.ast;
|
||||
import std.d.formatter;
|
||||
import std.stdio;
|
||||
import std.string;
|
||||
import std.array;
|
||||
import formatter;
|
||||
|
||||
template tagAndAccept(string tagName)
|
||||
{
|
||||
|
@ -372,6 +372,7 @@ class XMLPrinter : ASTVisitor
|
|||
{
|
||||
output.writeln("<declarator line=\"", declarator.name.line, "\">");
|
||||
output.writeln("<name>", declarator.name.text, "</name>");
|
||||
writeDdoc(declarator.comment);
|
||||
declarator.accept(this);
|
||||
output.writeln("</declarator>");
|
||||
}
|
||||
|
|
46
build.sh
46
build.sh
|
@ -1,46 +0,0 @@
|
|||
dmd\
|
||||
main.d\
|
||||
stats.d\
|
||||
imports.d\
|
||||
highlighter.d\
|
||||
ctags.d\
|
||||
astprinter.d\
|
||||
formatter.d\
|
||||
outliner.d\
|
||||
std/*.d\
|
||||
std/d/*.d\
|
||||
analysis/*.d\
|
||||
-version=DIP61\
|
||||
-ofdscanner\
|
||||
-g\
|
||||
-O -release -inline
|
||||
|
||||
#gdc\
|
||||
# main.d\
|
||||
# stats.d\
|
||||
# imports.d\
|
||||
# highlighter.d\
|
||||
# ctags.d\
|
||||
# astprinter.d\
|
||||
# formatter.d\
|
||||
# outliner.d\
|
||||
# std/*.d\
|
||||
# std/d/*.d\
|
||||
# analysis/*.d\
|
||||
# -O3 -frelease -fno-bounds-check\
|
||||
# -odscanner\
|
||||
|
||||
#ldc2\
|
||||
# main.d\
|
||||
# stats.d\
|
||||
# imports.d\
|
||||
# highlighter.d\
|
||||
# ctags.d\
|
||||
# astprinter.d\
|
||||
# formatter.d\
|
||||
# outliner.d\
|
||||
# std/*.d\
|
||||
# std/d/*.d\
|
||||
# analysis/*.d\
|
||||
# -O3 -release\
|
||||
# -oq -of=dscanner\
|
4
ctags.d
4
ctags.d
|
@ -20,13 +20,13 @@ void printCtags(File output, string[] fileNames)
|
|||
{
|
||||
string[] tags;
|
||||
LexerConfig config;
|
||||
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
|
||||
StringCache cache = StringCache(StringCache.defaultBucketCount);
|
||||
foreach (fileName; fileNames)
|
||||
{
|
||||
File f = File(fileName);
|
||||
auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size));
|
||||
f.rawRead(bytes);
|
||||
auto tokens = byToken(bytes, config, cache);
|
||||
auto tokens = getTokensForParser(bytes, config, &cache);
|
||||
Module m = parseModule(tokens.array, fileName, null, &doNothing);
|
||||
auto printer = new CTagsPrinter;
|
||||
printer.fileName = fileName;
|
||||
|
|
3886
formatter.d
3886
formatter.d
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Subproject commit d9387eb3b275295cd0263bdc273c4b0b63f29f98
|
38
main.d
38
main.d
|
@ -102,18 +102,15 @@ int run(string[] args)
|
|||
return 1;
|
||||
}
|
||||
|
||||
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
|
||||
StringCache cache = StringCache(StringCache.defaultBucketCount);
|
||||
|
||||
if (tokenDump || highlight)
|
||||
{
|
||||
bool usingStdin = args.length == 1;
|
||||
ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]);
|
||||
LexerConfig config;
|
||||
config.whitespaceBehavior = WhitespaceBehavior.include;
|
||||
config.stringBehavior = StringBehavior.source;
|
||||
config.commentBehavior = CommentBehavior.include;
|
||||
config.specialTokenBehavior = SpecialTokenBehavior.include;
|
||||
auto tokens = byToken(bytes, config, cache);
|
||||
auto tokens = byToken(bytes, config, &cache);
|
||||
if (highlight)
|
||||
{
|
||||
highlighter.highlight(tokens, args.length == 1 ? "stdin" : args[1]);
|
||||
|
@ -121,11 +118,11 @@ int run(string[] args)
|
|||
}
|
||||
else if (tokenDump)
|
||||
{
|
||||
writeln("text blank\tindex\tline\tcolumn\ttype");
|
||||
writeln("text blank\tindex\tline\tcolumn\ttype\tcomment");
|
||||
foreach (token; tokens)
|
||||
{
|
||||
writefln("<<%20s>>%b\t%d\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
|
||||
token.text !is null, token.index, token.line, token.column, token.type);
|
||||
writefln("<<%20s>>%b\t%d\t%d\t%d\t%d\t%s", token.text is null ? str(token.type) : token.text,
|
||||
token.text !is null, token.index, token.line, token.column, token.type, token.comment);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -150,10 +147,8 @@ int run(string[] args)
|
|||
if (usingStdin)
|
||||
{
|
||||
LexerConfig config;
|
||||
config.whitespaceBehavior = WhitespaceBehavior.skip;
|
||||
config.stringBehavior = StringBehavior.source;
|
||||
config.commentBehavior = CommentBehavior.attach;
|
||||
auto tokens = byToken(readStdin(), config, cache);
|
||||
auto tokens = byToken(readStdin(), config, &cache);
|
||||
if (tokenCount)
|
||||
printTokenCount(stdout, "stdin", tokens);
|
||||
else
|
||||
|
@ -166,10 +161,8 @@ int run(string[] args)
|
|||
{
|
||||
|
||||
LexerConfig config;
|
||||
config.whitespaceBehavior = WhitespaceBehavior.skip;
|
||||
config.stringBehavior = StringBehavior.source;
|
||||
config.commentBehavior = CommentBehavior.attach;
|
||||
auto tokens = byToken(readFile(f), config, cache);
|
||||
auto tokens = byToken(readFile(f), config, &cache);
|
||||
if (tokenCount)
|
||||
count += printTokenCount(stdout, f, tokens);
|
||||
else
|
||||
|
@ -180,9 +173,20 @@ int run(string[] args)
|
|||
}
|
||||
else if (imports || ast || outline)
|
||||
{
|
||||
auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
|
||||
auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1],
|
||||
null, &doNothing);
|
||||
string fileName = usingStdin ? "stdin" : args[1];
|
||||
LexerConfig config;
|
||||
config.fileName = fileName;
|
||||
config.stringBehavior = StringBehavior.source;
|
||||
auto tokens = getTokensForParser(
|
||||
usingStdin ? readStdin() : readFile(args[1]),
|
||||
config, &cache);
|
||||
// writeln("text blank\tindex\tline\tcolumn\ttype\tcomment");
|
||||
// foreach (token; tokens)
|
||||
// {
|
||||
// writefln("<<%20s>>%b\t%d\t%d\t%d\t%d\t%s", token.text is null ? str(token.type) : token.text,
|
||||
// token.text !is null, token.index, token.line, token.column, token.type, token.comment);
|
||||
// }
|
||||
auto mod = parseModule(tokens, fileName, null, &doNothing);
|
||||
if (imports)
|
||||
{
|
||||
auto visitor = new ImportPrinter;
|
||||
|
|
32
makefile
32
makefile
|
@ -1,9 +1,35 @@
|
|||
|
||||
.PHONY: all test
|
||||
|
||||
all:
|
||||
@./build.sh
|
||||
DMD = /home/brian/src/dmd/src/dmd
|
||||
GDC = gdc
|
||||
LDC = ldc
|
||||
SRC = main.d\
|
||||
stats.d\
|
||||
imports.d\
|
||||
highlighter.d\
|
||||
ctags.d\
|
||||
astprinter.d\
|
||||
outliner.d\
|
||||
libdparse/src/std/*.d\
|
||||
libdparse/src/std/d/*.d\
|
||||
analysis/*.d
|
||||
INCLUDE_PATHS = -Ilibdparse/src
|
||||
VERSIONS = -version=DIP61
|
||||
|
||||
all: dmdbuild
|
||||
|
||||
dmdbuild:
|
||||
${DMD} -O -release -inline -ofdscanner ${VERSIONS} ${INCLUDE_PATHS} ${SRC}
|
||||
|
||||
gdcbuild:
|
||||
${GDC} -O3 -frelease -odscanner ${VERSIONS} ${INCLUDE_PATHS} ${SRC}
|
||||
|
||||
ldcbuild:
|
||||
${LDC} -O5 -release -oq -of=dscanner ${VERSIONS} ${INCLUDE_PATHS} ${SRC}
|
||||
|
||||
test:
|
||||
@./test.sh
|
||||
|
||||
clean:
|
||||
rm -f dscanner *.o
|
||||
|
||||
|
|
|
@ -5,11 +5,11 @@
|
|||
|
||||
import std.d.lexer;
|
||||
import std.d.ast;
|
||||
import std.d.formatter;
|
||||
import std.stdio;
|
||||
import std.string;
|
||||
import std.array;
|
||||
import std.conv;
|
||||
import formatter;
|
||||
|
||||
class Outliner : ASTVisitor
|
||||
{
|
||||
|
|
4617
std/allocator.d
4617
std/allocator.d
File diff suppressed because it is too large
Load Diff
3151
std/d/ast.d
3151
std/d/ast.d
File diff suppressed because it is too large
Load Diff
2150
std/d/entities.d
2150
std/d/entities.d
File diff suppressed because it is too large
Load Diff
2188
std/d/lexer.d
2188
std/d/lexer.d
File diff suppressed because it is too large
Load Diff
7067
std/d/parser.d
7067
std/d/parser.d
File diff suppressed because it is too large
Load Diff
834
std/lexer.d
834
std/lexer.d
|
@ -1,834 +0,0 @@
|
|||
// Written in the D programming language
|
||||
|
||||
/**
|
||||
* $(H2 Summary)
|
||||
* This module contains a range-based compile-time _lexer generator.
|
||||
*
|
||||
* $(H2 Overview)
|
||||
* The _lexer generator consists of a template mixin, $(LREF Lexer), along with
|
||||
* several helper templates for generating such things as token identifiers.
|
||||
*
|
||||
* To write a _lexer using this API:
|
||||
* $(OL
|
||||
* $(LI Create the string array costants for your language.
|
||||
* $(UL
|
||||
* $(LI $(LINK2 #.staticTokens, staticTokens))
|
||||
* $(LI $(LINK2 #.dynamicTokens, dynamicTokens))
|
||||
* $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens))
|
||||
* $(LI $(LINK2 #.tokenHandlers, tokenHandlers))
|
||||
* ))
|
||||
* $(LI Create aliases for the various token and token identifier types
|
||||
* specific to your language.
|
||||
* $(UL
|
||||
* $(LI $(LREF TokenIdType))
|
||||
* $(LI $(LREF tokenStringRepresentation))
|
||||
* $(LI $(LREF TokenStructure))
|
||||
* $(LI $(LREF TokenId))
|
||||
* ))
|
||||
* $(LI Create a struct that mixes in the Lexer template mixin and
|
||||
* implements the necessary functions.
|
||||
* $(UL
|
||||
* $(LI $(LREF Lexer))
|
||||
* ))
|
||||
* )
|
||||
* Examples:
|
||||
* $(UL
|
||||
* $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/std/d/lexer.d, here).)
|
||||
* $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).)
|
||||
* $(LI A _lexer for JSON is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/jsonlexer.d, here).)
|
||||
* )
|
||||
* $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions)
|
||||
* $(DL
|
||||
* $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction)
|
||||
* $(DD A function that serves as the default token lexing function. For most
|
||||
* languages this will be the identifier lexing function.))
|
||||
* $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction))
|
||||
* $(DD A function that is able to determine if an identifier/keyword has come
|
||||
* to an end. This function must return bool and take a single size_t
|
||||
* argument representing the number of bytes to skip over before looking for
|
||||
* a separating character.)
|
||||
* $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens))
|
||||
* $(DD A listing of the tokens whose exact value never changes and which cannot
|
||||
* possibly be a token handled by the default token lexing function. The
|
||||
* most common example of this kind of token is an operator such as
|
||||
* $(D_STRING "*"), or $(D_STRING "-") in a programming language.)
|
||||
* $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens))
|
||||
* $(DD A listing of tokens whose value is variable, such as whitespace,
|
||||
* identifiers, number literals, and string literals.)
|
||||
* $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens))
|
||||
* $(DD A listing of tokens that could posibly be one of the tokens handled by
|
||||
* the default token handling function. An common example of this is
|
||||
* a keyword such as $(D_STRING "for"), which looks like the beginning of
|
||||
* the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is
|
||||
* called to determine if the character after the $(D_STRING 'r') separates
|
||||
* the identifier, indicating that the token is $(D_STRING "for"), or if
|
||||
* lexing should be turned over to the $(B defaultTokenFunction).)
|
||||
* $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers))
|
||||
* $(DD A mapping of prefixes to custom token handling function names. The
|
||||
* generated _lexer will search for the even-index elements of this array,
|
||||
* and then call the function whose name is the element immedately after the
|
||||
* even-indexed element. This is used for lexing complex tokens whose prefix
|
||||
* is fixed.)
|
||||
* )
|
||||
*
|
||||
* Here are some example constants for a simple calculator _lexer:
|
||||
* ---
|
||||
* // There are a near infinite number of valid number literals, so numbers are
|
||||
* // dynamic tokens.
|
||||
* enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
|
||||
*
|
||||
* // The operators are always the same, and cannot start a numberLiteral, so
|
||||
* // they are staticTokens
|
||||
* enum string[] staticTokens = ["-", "+", "*", "/"];
|
||||
*
|
||||
* // In this simple example there are no keywords or other tokens that could
|
||||
* // look like dynamic tokens, so this is blank.
|
||||
* enum string[] possibleDefaultTokens = [];
|
||||
*
|
||||
* // If any whitespace character or digit is encountered, pass lexing over to
|
||||
* // our custom handler functions. These will be demonstrated in an example
|
||||
* // later on.
|
||||
* enum string[] tokenHandlers = [
|
||||
* "0", "lexNumber",
|
||||
* "1", "lexNumber",
|
||||
* "2", "lexNumber",
|
||||
* "3", "lexNumber",
|
||||
* "4", "lexNumber",
|
||||
* "5", "lexNumber",
|
||||
* "6", "lexNumber",
|
||||
* "7", "lexNumber",
|
||||
* "8", "lexNumber",
|
||||
* "9", "lexNumber",
|
||||
* " ", "lexWhitespace",
|
||||
* "\n", "lexWhitespace",
|
||||
* "\t", "lexWhitespace",
|
||||
* "\r", "lexWhitespace"
|
||||
* ];
|
||||
* ---
|
||||
*
|
||||
* Copyright: Brian Schott 2013
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||
* Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu
|
||||
* Source: $(PHOBOSSRC std/_lexer.d)
|
||||
*/
|
||||
|
||||
module std.lexer;
|
||||
|
||||
/**
|
||||
* Template for determining the type used for a token type. Selects the smallest
|
||||
* unsigned integral type that is able to hold the value
|
||||
* staticTokens.length + dynamicTokens.length + possibleDefaultTokens.length.
|
||||
* For example if there are 20 static tokens, 30 dynamic tokens,
|
||||
* and 10 possible default tokens, this template will alias itself to ubyte,
|
||||
* as 20 + 30 + 10 < $(D_KEYWORD ubyte).max.
|
||||
* Examples:
|
||||
* ---
|
||||
* // In our calculator example this means that IdType is an alias for ubyte.
|
||||
* alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
* ---
|
||||
*/
|
||||
template TokenIdType(alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens)
|
||||
{
|
||||
immutable tokenCount = staticTokens.length + dynamicTokens.length
|
||||
+ possibleDefaultTokens.length + 1;
|
||||
static if (tokenCount <= ubyte.max)
|
||||
alias TokenIdType = ubyte;
|
||||
else static if (tokenCount <= ushort.max)
|
||||
alias TokenIdType = ushort;
|
||||
else static if (tokenCount <= uint.max)
|
||||
alias TokenIdType = uint;
|
||||
else
|
||||
static assert (false, "The number of tokens must be less than uint.max");
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up the string representation of the given token type. This is the
|
||||
* opposite of the function of the TokenId template.
|
||||
* Params: type = the token type identifier
|
||||
* Examples:
|
||||
* ---
|
||||
* alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
|
||||
* assert (str(tok!"*") == "*");
|
||||
* ---
|
||||
* See_also: $(LREF TokenId)
|
||||
*/
|
||||
string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens)(IdType type) @property
|
||||
{
|
||||
enum tokens = staticTokens ~ dynamicTokens ~ possibleDefaultTokens;
|
||||
|
||||
if (type == 0)
|
||||
return "!ERROR!";
|
||||
else if (type < tokens.length + 1)
|
||||
return tokens[type - 1];
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
/// Fix https://github.com/Hackerpilot/Dscanner/issues/96
|
||||
alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]);
|
||||
enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token);
|
||||
alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]);
|
||||
|
||||
static assert (str(tok!"foo") == "foo");
|
||||
static assert (str(tok!"bar") == "bar");
|
||||
static assert (str(tok!"doo") == "doo");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the token type identifier for the given symbol. There are two
|
||||
* special cases:
|
||||
* $(UL
|
||||
* $(LI If symbol is $(D_STRING ""), then the token identifier will be 0)
|
||||
* $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum
|
||||
* valid token type identifier)
|
||||
* )
|
||||
* In all cases this template will alias itself to a constant of type IdType.
|
||||
* This template will fail at compile time if $(D_PARAM symbol) is not one of
|
||||
* the staticTokens, dynamicTokens, or possibleDefaultTokens.
|
||||
* Examples:
|
||||
* ---
|
||||
* template tok(string symbol)
|
||||
* {
|
||||
* alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
|
||||
* possibleDefaultTokens, symbol);
|
||||
* }
|
||||
* // num and plus are of type ubyte.
|
||||
* IdType plus = tok!"+";
|
||||
* IdType num = tok!"numberLiteral";
|
||||
* ---
|
||||
*/
|
||||
template TokenId(IdType, alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens, string symbol)
|
||||
{
|
||||
enum tokens = staticTokens ~ dynamicTokens ~ possibleDefaultTokens;
|
||||
|
||||
import std.algorithm;
|
||||
static if (symbol == "")
|
||||
{
|
||||
enum id = 0;
|
||||
alias TokenId = id;
|
||||
}
|
||||
else static if (symbol == "\0")
|
||||
{
|
||||
enum id = 1 + tokens.length;
|
||||
alias TokenId = id;
|
||||
}
|
||||
else
|
||||
{
|
||||
enum i = tokens.countUntil(symbol);
|
||||
static if (i != -1)
|
||||
{
|
||||
enum id = i + 1;
|
||||
static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol);
|
||||
alias TokenId = id;
|
||||
}
|
||||
else
|
||||
static assert (0, "Invalid token: " ~ symbol);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The token that is returned by the lexer.
|
||||
* Params:
|
||||
* IdType = The D type of the "type" token type field.
|
||||
* extraFields = A string containing D code for any extra fields that should
|
||||
* be included in the token structure body. This string is passed
|
||||
* directly to a mixin statement.
|
||||
* Examples:
|
||||
* ---
|
||||
* // No extra struct fields are desired in this example, so leave it blank.
|
||||
* alias Token = TokenStructure!(IdType, "");
|
||||
* Token minusToken = Token(tok!"-");
|
||||
* ---
|
||||
*/
|
||||
struct TokenStructure(IdType, string extraFields = "")
|
||||
{
|
||||
public:
|
||||
|
||||
bool opEquals(ref const typeof(this) other) const pure nothrow @safe
|
||||
{
|
||||
return this.type == other.type && this.text == other.text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returs: true if the token has the given type, false otherwise.
|
||||
*/
|
||||
bool opEquals(IdType type) const pure nothrow @safe
|
||||
{
|
||||
return this.type == type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a token from a token type.
|
||||
* Params: type = the token type
|
||||
*/
|
||||
this(IdType type)
|
||||
{
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a token.
|
||||
* Params:
|
||||
* type = the token type
|
||||
* text = the text of the token, which may be null
|
||||
* line = the line number at which this token occurs
|
||||
* column = the column number at which this token occurs
|
||||
* index = the byte offset from the beginning of the input at which this
|
||||
* token occurs
|
||||
*/
|
||||
this(IdType type, string text, size_t line, size_t column, size_t index)
|
||||
{
|
||||
this.text = text;
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
this.type = type;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* The _text of the token.
|
||||
*/
|
||||
string text;
|
||||
|
||||
/**
|
||||
* The _line number at which this token occurs.
|
||||
*/
|
||||
size_t line;
|
||||
|
||||
/**
|
||||
* The _column number at which this token occurs. This is measured in bytes
|
||||
* and may not be correct when tab characters are involved.
|
||||
*/
|
||||
size_t column;
|
||||
|
||||
/**
|
||||
* The byte offset from the beginning of the input at which this token
|
||||
* occurs.
|
||||
*/
|
||||
size_t index;
|
||||
|
||||
/**
|
||||
* The token type.
|
||||
*/
|
||||
IdType type;
|
||||
|
||||
mixin (extraFields);
|
||||
}
|
||||
|
||||
/**
|
||||
* The implementation of the _lexer is contained within this mixin template.
|
||||
* To use it, this template should be mixed in to a struct that represents the
|
||||
* _lexer for your language. This struct should implement the following methods:
|
||||
* $(UL
|
||||
* $(LI popFront, which should call this mixin's _popFront() and
|
||||
* additionally perform any token filtering or shuffling you deem
|
||||
* necessary. For example, you can implement popFront to skip comment or
|
||||
* tokens.)
|
||||
* $(LI A function that serves as the default token lexing function. For
|
||||
* most languages this will be the identifier lexing function. This
|
||||
* should then be passed to the $(LREF Lexer) template mixin as the
|
||||
* $(LINK2 #.defaultTokenFunction defaultTokenFunction) template
|
||||
* parameter).
|
||||
* $(LI A function that is able to determine if an identifier/keyword has
|
||||
* come to an end. This function must return $(D_KEYWORD bool) and take
|
||||
* a single $(D_KEYWORD size_t) argument representing the number of
|
||||
* bytes to skip over before looking for a separating character.)
|
||||
* $(LI Any functions referred to in the tokenHandlers template paramater.
|
||||
* These functions must be marked $(D_KEYWORD pure nothrow), take no
|
||||
* arguments, and return a token)
|
||||
* $(LI A constructor that initializes the range field as well as calls
|
||||
* popFront() exactly once (to initialize the _front field).)
|
||||
* )
|
||||
* Params:
|
||||
* Token = $(LREF TokenStructure)
|
||||
* defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction)
|
||||
* tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction)
|
||||
* staticTokens = $(LINK2 #.staticTokens, staticTokens)
|
||||
* dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens)
|
||||
* possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)
|
||||
* tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers)
|
||||
* Examples:
|
||||
* ---
|
||||
* struct CalculatorLexer
|
||||
* {
|
||||
* mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
|
||||
* staticTokens, dynamicTokens, possibleDefaultTokens, tokenHandlers);
|
||||
*
|
||||
* this (ubyte[] bytes)
|
||||
* {
|
||||
* this.range = LexerRange(bytes);
|
||||
* popFront();
|
||||
* }
|
||||
*
|
||||
* void popFront() pure
|
||||
* {
|
||||
* _popFront();
|
||||
* }
|
||||
*
|
||||
* Token lexNumber() pure nothrow @safe
|
||||
* {
|
||||
* // implementation goes here
|
||||
* }
|
||||
*
|
||||
* Token lexWhitespace() pure nothrow @safe
|
||||
* {
|
||||
* // implementation goes here
|
||||
* }
|
||||
*
|
||||
* Token defaultTokenFunction() pure nothrow @safe
|
||||
* {
|
||||
* // There is no default token in the example calculator language, so
|
||||
* // this is always an error.
|
||||
* range.popFront();
|
||||
* return Token(tok!"");
|
||||
* }
|
||||
*
|
||||
* bool isSeparating(size_t offset) pure nothrow @safe
|
||||
* {
|
||||
* // For this example language, always return true.
|
||||
* return true;
|
||||
* }
|
||||
* }
|
||||
* ---
|
||||
*/
|
||||
mixin template Lexer(Token, alias defaultTokenFunction,
|
||||
alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens,
|
||||
alias possibleDefaultTokens, alias tokenHandlers)
|
||||
{
|
||||
private alias _IDType = typeof(Token.type);
|
||||
private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol);
|
||||
|
||||
static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must"
|
||||
~ " have a corresponding handler function name.");
|
||||
|
||||
static string generateMask(const ubyte[] arr)
|
||||
{
|
||||
import std.string;
|
||||
ulong u;
|
||||
for (size_t i = 0; i < arr.length && i < 8; i++)
|
||||
{
|
||||
u |= (cast(ulong) arr[i]) << (i * 8);
|
||||
}
|
||||
return format("0x%016x", u);
|
||||
}
|
||||
|
||||
private static string generateByteMask(size_t l)
|
||||
{
|
||||
import std.string;
|
||||
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
||||
}
|
||||
|
||||
private static size_t calcSplitCount(size_t a, size_t b) pure nothrow
|
||||
{
|
||||
int i;
|
||||
while (true)
|
||||
{
|
||||
i++;
|
||||
a /= 2;
|
||||
if (a < b)
|
||||
break;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
private static char[] getBeginningChars(string[] allTokens)
|
||||
{
|
||||
char[] beginningChars;
|
||||
for (size_t i = 0; i < allTokens.length; i++)
|
||||
{
|
||||
if (allTokens[i].length == 0)
|
||||
continue;
|
||||
beginningChars ~= allTokens[i][0];
|
||||
size_t j = i + 1;
|
||||
while (j < allTokens.length && allTokens[i][0] == allTokens[j][0])
|
||||
j++;
|
||||
i = j - 1;
|
||||
}
|
||||
return beginningChars;
|
||||
}
|
||||
|
||||
private static string generateStatements()
|
||||
{
|
||||
import std.algorithm;
|
||||
import std.conv;
|
||||
import std.string;
|
||||
import std.range;
|
||||
|
||||
string[] pseudoTokens = stupidToArray(tokenHandlers.stride(2));
|
||||
string[] allTokens = stupidToArray(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq);
|
||||
// Array consisting of a sorted list of the first characters of the
|
||||
// tokens.
|
||||
char[] beginningChars = getBeginningChars(allTokens);
|
||||
size_t i = calcSplitCount(beginningChars.length, 8);
|
||||
return generateStatementsStep(allTokens, pseudoTokens, beginningChars, i);
|
||||
}
|
||||
|
||||
private static string generateStatementsStep(string[] allTokens,
|
||||
string[] pseudoTokens, char[] chars, size_t i, string indent = "")
|
||||
{
|
||||
import std.string;
|
||||
string code;
|
||||
if (i > 0)
|
||||
{
|
||||
size_t p = chars.length / 2;
|
||||
code ~= indent ~ format("if (f < 0x%02x) // %s \n%s{\n", chars[p], chars[p], indent);
|
||||
code ~= generateStatementsStep(allTokens, pseudoTokens, chars[0 .. p], i - 1, indent ~ " ");
|
||||
code ~= indent ~ "}\n" ~ indent ~ "else\n" ~ indent ~ "{\n";
|
||||
code ~= generateStatementsStep(allTokens, pseudoTokens, chars[p .. $], i - 1, indent ~ " ");
|
||||
code ~= indent ~ "}\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
code ~= indent ~ "switch (f)\n" ~ indent ~ "{\n";
|
||||
foreach (char c; chars)
|
||||
{
|
||||
size_t begin;
|
||||
size_t end;
|
||||
for (size_t j = 0; j < allTokens.length; j++)
|
||||
{
|
||||
if (allTokens[j].length == 0 || allTokens[j][0] != c)
|
||||
continue;
|
||||
begin = j;
|
||||
end = j + 1;
|
||||
while (end < allTokens.length && allTokens[begin][0] == allTokens[end][0])
|
||||
end++;
|
||||
break;
|
||||
}
|
||||
code ~= format("%scase 0x%02x:\n", indent, c);
|
||||
code ~= printCase(allTokens[begin .. end], pseudoTokens, indent ~ " ");
|
||||
}
|
||||
code ~= indent ~ "default: goto _defaultTokenFunction;\n";
|
||||
code ~= indent ~ "}\n";
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
private static string printCase(string[] tokens, string[] pseudoTokens, string indent)
|
||||
{
|
||||
import std.algorithm;
|
||||
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(tokens));
|
||||
import std.conv;
|
||||
|
||||
if (tokens.length == 1 && tokens[0].length == 1)
|
||||
{
|
||||
if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||
{
|
||||
return indent ~ "return "
|
||||
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
else if (staticTokens.countUntil(tokens[0]) >= 0)
|
||||
{
|
||||
return indent ~ "range.popFront();\n"
|
||||
~ indent ~ "return Token(_tok!\"" ~ escape(tokens[0]) ~ "\", null, line, column, index);\n";
|
||||
}
|
||||
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||
{
|
||||
return indent ~ "return "
|
||||
~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
}
|
||||
|
||||
string code;
|
||||
|
||||
foreach (i, token; sortedTokens)
|
||||
{
|
||||
immutable mask = generateMask(cast (const ubyte[]) token);
|
||||
if (token.length >= 8)
|
||||
code ~= indent ~ "if (frontBytes == " ~ mask ~ ")\n";
|
||||
else
|
||||
code ~= indent ~ "if ((frontBytes & " ~ generateByteMask(token.length) ~ ") == " ~ mask ~ ")\n";
|
||||
code ~= indent ~ "{\n";
|
||||
if (pseudoTokens.countUntil(token) >= 0)
|
||||
{
|
||||
if (token.length <= 8)
|
||||
{
|
||||
code ~= indent ~ " return "
|
||||
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
code ~= indent ~ " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\")\n";
|
||||
code ~= indent ~ " return "
|
||||
~ tokenHandlers[tokenHandlers.countUntil(token) + 1]
|
||||
~ "();\n";
|
||||
}
|
||||
}
|
||||
else if (staticTokens.countUntil(token) >= 0)
|
||||
{
|
||||
if (token.length <= 8)
|
||||
{
|
||||
code ~= indent ~ " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||
code ~= indent ~ " return Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
code ~= indent ~ " pragma(msg, \"long static tokens not supported\"); // " ~ escape(token) ~ "\n";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// possible default
|
||||
if (token.length <= 8)
|
||||
{
|
||||
code ~= indent ~ " if (tokenSeparatingFunction(" ~ text(token.length) ~ "))\n";
|
||||
code ~= indent ~ " {\n";
|
||||
code ~= indent ~ " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||
code ~= indent ~ " return Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||
code ~= indent ~ " }\n";
|
||||
code ~= indent ~ " else\n";
|
||||
code ~= indent ~ " goto _defaultTokenFunction;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
code ~= indent ~ " if (range.peek(" ~ text(token.length - 1) ~ ") == \"" ~ escape(token) ~"\" && isSeparating(" ~ text(token.length) ~ "))\n";
|
||||
code ~= indent ~ " {\n";
|
||||
code ~= indent ~ " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||
code ~= indent ~ " return Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||
code ~= indent ~ " }\n";
|
||||
code ~= indent ~ " else\n";
|
||||
code ~= indent ~ " goto _defaultTokenFunction;\n";
|
||||
}
|
||||
}
|
||||
code ~= indent ~ "}\n";
|
||||
}
|
||||
code ~= indent ~ "else\n";
|
||||
code ~= indent ~ " goto _defaultTokenFunction;\n";
|
||||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _front.
|
||||
*/
|
||||
ref const(Token) front() pure nothrow const @property
|
||||
{
|
||||
return _front;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the lexer to the next token and stores the new current token in
|
||||
* the _front variable.
|
||||
*/
|
||||
void _popFront() pure
|
||||
{
|
||||
_front = advance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _empty.
|
||||
*/
|
||||
bool empty() pure const nothrow @property
|
||||
{
|
||||
return _front.type == _tok!"\0";
|
||||
}
|
||||
|
||||
static string escape(string input)
|
||||
{
|
||||
string retVal;
|
||||
foreach (ubyte c; cast(ubyte[]) input)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\\': retVal ~= `\\`; break;
|
||||
case '"': retVal ~= `\"`; break;
|
||||
case '\'': retVal ~= `\'`; break;
|
||||
case '\t': retVal ~= `\t`; break;
|
||||
case '\n': retVal ~= `\n`; break;
|
||||
case '\r': retVal ~= `\r`; break;
|
||||
default: retVal ~= c; break;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// This only exists because the real array() can't be called at compile-time
|
||||
static string[] stupidToArray(R)(R range)
|
||||
{
|
||||
string[] retVal;
|
||||
foreach (v; range)
|
||||
retVal ~= v;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
enum tokenSearch = generateStatements();
|
||||
|
||||
static ulong getFront(const ubyte[] arr) pure nothrow @trusted
|
||||
{
|
||||
import std.stdio;
|
||||
immutable importantBits = *(cast (ulong*) arr.ptr);
|
||||
immutable filler = ulong.max >> ((8 - arr.length) * 8);
|
||||
return importantBits & filler;
|
||||
}
|
||||
|
||||
Token advance() pure
|
||||
{
|
||||
if (range.empty)
|
||||
return Token(_tok!"\0");
|
||||
immutable size_t index = range.index;
|
||||
immutable size_t column = range.column;
|
||||
immutable size_t line = range.line;
|
||||
immutable ulong frontBytes = getFront(range.peek(7));
|
||||
ubyte f = frontBytes & 0xff;
|
||||
// pragma(msg, tokenSearch);
|
||||
mixin(tokenSearch);
|
||||
_defaultTokenFunction:
|
||||
return defaultTokenFunction();
|
||||
}
|
||||
|
||||
/**
|
||||
* The lexer input.
|
||||
*/
|
||||
LexerRange range;
|
||||
|
||||
/**
|
||||
* The token that is currently at the front of the range.
|
||||
*/
|
||||
Token _front;
|
||||
}
|
||||
|
||||
/**
|
||||
* Range structure that wraps the _lexer's input.
|
||||
*/
|
||||
struct LexerRange
|
||||
{
|
||||
/**
|
||||
* Params:
|
||||
* bytes = the _lexer input
|
||||
* index = the initial offset from the beginning of $(D_PARAM bytes)
|
||||
* column = the initial _column number
|
||||
* line = the initial _line number
|
||||
*/
|
||||
this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
|
||||
{
|
||||
this.bytes = bytes;
|
||||
this.index = index;
|
||||
this.column = column;
|
||||
this.line = line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: a mark at the current position that can then be used with slice.
|
||||
*/
|
||||
size_t mark() const nothrow pure @safe
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the range to the given position.
|
||||
* Params: m = the position to seek to
|
||||
*/
|
||||
void seek(size_t m) nothrow pure @safe
|
||||
{
|
||||
index = m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returs a slice of the input byte array between the given mark and the
|
||||
* current position.
|
||||
* Params m = the beginning index of the slice to return
|
||||
*/
|
||||
const(ubyte)[] slice(size_t m) const nothrow pure @safe
|
||||
{
|
||||
return bytes[m .. index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _empty.
|
||||
*/
|
||||
bool empty() const nothrow pure @safe
|
||||
{
|
||||
return index >= bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _front.
|
||||
*/
|
||||
ubyte front() const nothrow pure @safe
|
||||
{
|
||||
return bytes[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: the current item as well as the items $(D_PARAM p) items ahead.
|
||||
*/
|
||||
const(ubyte)[] peek(size_t p) const nothrow pure @safe
|
||||
{
|
||||
return index + p + 1 > bytes.length
|
||||
? bytes[index .. $]
|
||||
: bytes[index .. index + p + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
ubyte peekAt(size_t offset) const nothrow pure @safe
|
||||
{
|
||||
return bytes[index + offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns: true if it is possible to peek $(D_PARAM p) bytes ahead.
|
||||
*/
|
||||
bool canPeek(size_t p) const nothrow pure @safe
|
||||
{
|
||||
return index + p < bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the range primitive _popFront.
|
||||
*/
|
||||
void popFront() pure nothrow @safe
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements the algorithm _popFrontN more efficiently. This function does
|
||||
* not detect or handle newlines.
|
||||
*/
|
||||
void popFrontN(size_t n) pure nothrow @safe
|
||||
{
|
||||
index += n;
|
||||
column += n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the range's line number and resets the column counter.
|
||||
*/
|
||||
void incrementLine(size_t i = 1) pure nothrow @safe
|
||||
{
|
||||
column = 1;
|
||||
line += i;
|
||||
}
|
||||
|
||||
/**
|
||||
* The input _bytes.
|
||||
*/
|
||||
const(ubyte)[] bytes;
|
||||
|
||||
/**
|
||||
* The range's current position.
|
||||
*/
|
||||
size_t index;
|
||||
|
||||
/**
|
||||
* The current _column number.
|
||||
*/
|
||||
size_t column;
|
||||
|
||||
/**
|
||||
* The current _line number.
|
||||
*/
|
||||
size_t line;
|
||||
}
|
||||
|
Loading…
Reference in New Issue