diff --git a/autocomplete.d b/autocomplete.d deleted file mode 100644 index b408fdc..0000000 --- a/autocomplete.d +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module autocomplete; - -import std.range; -import std.algorithm; -import std.array; -import std.conv; -import std.stdio; -import std.typecons; -import std.path; -import std.file; -import std.d.lexer; - -import parser; -import langutils; -import types; - -immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", - "Cygwin", "D_Coverage", "D_Ddoc", "DigitalMars", "D_InlineAsm_X86", - "D_InlineAsm_X86_64", "D_LP64", "D_NET", "D_PIC", "D_Version2", - "FreeBSD", "GNU", "HPPA", "HPPA64", "Hurd", "IA64", "LDC", "linux", - "LittleEndian", "MinGW", "MIPS", "MIPS64", "none", "OpenBSD", "OSX", - "Posix", "PPC", "PPC64", "S390", "S390X", "SDC", "SH", "SH64", "SkyOS", - "Solaris", "SPARC", "SPARC64", "SysV3", "SysV4", "unittest", "Win32", - "Win64", "Windows", "X86", "X86_64" -]; - -immutable string[] scopes = ["exit", "failure", "success"]; - -///** -// * Returns: indicies into the token array -// */ -//size_t findEndOfExpression(const Token[] tokens, const size_t index) -//out (result) -//{ -// assert (result < tokens.length); -// assert (result >= index); -//} -//body -//{ -// size_t i = index; -// loop: while (i < tokens.length) -// { -// switch (tokens[i].type) -// { -// case TokenType.Return: -// case TokenType.New: -// case TokenType.Delete: -// case TokenType.Comma: -// case TokenType.RBrace: -// case TokenType.RParen: -// case TokenType.RBracket: -// case TokenType.Semicolon: -// break loop; -// case TokenType.LParen: -// skipParens(tokens, i); -// break; -// case TokenType.LBrace: -// skipBraces(tokens, i); -// break; -// case TokenType.LBracket: -// skipBrackets(tokens, i); -// break; -// default: -// ++i; -// break; -// } -// } -// return i; -//} -// -//size_t findBeginningOfExpression(const Token[] tokens, const size_t index) -//in -//{ -// assert (index < tokens.length); -// assert (tokens.length > 0); -//} -//out (result) -//{ -// import std.string; -// assert (result < tokens.length); -// assert (result <= index, format("findBeginningOfExpression %d, %d", result, index)); -//} -//body -//{ -// size_t i = index; -// loop: while (i < tokens.length) -// { -// switch (tokens[i].type) -// { -// case TokenType.Assign: case TokenType.BitAnd: case TokenType.BitAndEquals: -// case TokenType.BitOr: case TokenType.BitOrEquals: case TokenType.CatEquals: -// case TokenType.Colon: case TokenType.Comma: case TokenType.Decrement: -// case TokenType.Div: case TokenType.DivEquals: case TokenType.Dollar: -// case TokenType.Equals: case TokenType.GoesTo: -// case TokenType.Greater: case TokenType.GreaterEqual: case TokenType.Hash: -// case TokenType.Increment: case TokenType.LBrace: case TokenType.LBracket: -// case TokenType.Less: case TokenType.LessEqual: case TokenType.LessEqualGreater: -// case TokenType.LessOrGreater: case TokenType.LogicAnd: case TokenType.LogicOr: -// case TokenType.LParen: case TokenType.Minus: case TokenType.MinusEquals: -// case TokenType.Mod: case TokenType.ModEquals: case TokenType.MulEquals: -// case TokenType.Not: case TokenType.NotEquals: case TokenType.NotGreater: -// case TokenType.NotGreaterEqual: case TokenType.NotLess: case TokenType.NotLessEqual: -// case TokenType.NotLessEqualGreater: case TokenType.Plus: case TokenType.PlusEquals: -// case TokenType.Pow: case TokenType.PowEquals: case TokenType.RBrace: -// case TokenType.Semicolon: case TokenType.ShiftLeft: case TokenType.ShiftLeftEqual: -// case TokenType.ShiftRight: case TokenType.ShiftRightEqual: case TokenType.Slice: -// case TokenType.Star: case TokenType.Ternary: case TokenType.Tilde: -// case TokenType.Unordered: case TokenType.UnsignedShiftRight: case TokenType.UnsignedShiftRightEqual: -// case TokenType.Vararg: case TokenType.Xor: case TokenType.XorEquals: -// case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END: -// return i + 1; -// case TokenType.RParen: -// if (i == 0) -// break loop; -// skipParens(tokens, i); -// break; -// case TokenType.RBracket: -// if (i == 0) -// break loop; -// skipBrackets(tokens, i); -// break; -// default: -// if (i == 0) -// break loop; -// i--; -// break; -// } -// } -// return i + 1; -//} -// -//const(Token)[] splitCallChain(const(Token)[] tokens) -//{ -// auto app = appender!(Token[])(); -// size_t i = 0; -// while (i < tokens.length) -// { -// app.put(tokens[i++]); -// while (i < tokens.length && tokens[i] == TokenType.LParen) skipParens(tokens, i); -// while (i < tokens.length && tokens[i] == TokenType.LBracket) skipBrackets(tokens, i); -// while (i < tokens.length && tokens[i] == TokenType.Dot) ++i; -// } -// return app.data; -//} -// -//unittest -//{ -// auto code = `a.b[10].c("grcl").x`; -// auto tokens = tokenize(code); -// assert (splitCallChain(tokens) == ["a", "b", "c", "x"]); -//} -// -//struct AutoComplete -//{ -// this(const (Token)[] tokens, CompletionContext context) -// { -// this.tokens = tokens; -// this.context = context; -// } -// -// string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor) -// { -// stderr.writeln("getting type of ", expression); -// if (expression.length == 0) -// return "void"; -// auto type = typeOfVariable(expression[0], cursor); -// if (type is null) -// return "void"; -// size_t index = 1; -// while (index < expression.length) -// { -// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( -// type); -// const Tuple!(string, string)* memberType = expression[index].value in typeMap; -// if (memberType is null) -// return "void"; -// else -// type = (*memberType)[0]; -// index++; -// } -// return type; -// } -// -// string typeOfVariable(Token symbol, size_t cursor) -// { -// // int is of type int, double of type double, and so on -// if (symbol.value in typeProperties) -// return symbol.value; -// -// string tokenType = getTypeFromToken(symbol); -// if (tokenType !is null) -// return tokenType; -// -// if (context.getMembersOfType(symbol.value)) -// return symbol.value; -// -// // Arbitrarily define the depth of the cursor position as zero -// // iterate backwards through the code to try to find the variable -// int depth = 0; -// auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); -// auto index = preceedingTokens.length - 1; -// while (true) -// { -// if (preceedingTokens[index] == TokenType.LBrace) -// --depth; -// else if (preceedingTokens[index] == TokenType.RBrace) -// ++depth; -// else if (depth <= 0 && preceedingTokens[index].value == symbol) -// { -// // Found the symbol, now determine if it was declared here. -// auto p = preceedingTokens[index - 1]; -// -// -// if ((p == TokenType.Auto || p == TokenType.Immutable -// || p == TokenType.Const) -// && preceedingTokens[index + 1] == TokenType.Assign) -// { -// // Try to determine the type of a variable declared as "auto" -// return getTypeOfExpression( -// tokens[index + 2 .. findEndOfExpression(tokens, index + 2)], -// tokens, cursor); -// } -// else if (p == TokenType.Identifier -// || (p.type > TokenType.TYPES_BEGIN -// && p.type < TokenType.TYPES_END)) -// { -// // Handle simple cases like "int a;" or "Someclass instance;" -// return p.value; -// } -// else if (p == TokenType.RBracket || p == TokenType.RParen) -// { -// return combineTokens(tokens[findBeginningOfExpression(tokens, index) .. index]); -// } -// } -// if (index == 0) -// break; -// else -// --index; -// } -// -// // Find all struct or class bodies that we're in. -// // Check for the symbol in those class/struct/interface bodies -// // if match is found, return it -// auto structs = context.getStructsContaining(cursor); -// if (symbol == "this" && structs.length > 0) -// { -// return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; -// } -// -// foreach (s; structs) -// { -// auto t = s.getMemberType(symbol.value); -// if (t !is null) -// return t; -// } -// return "void"; -// } -// -// string symbolAt(size_t cursor) const -// { -// auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; -// if (r.value.length + r.startIndex > cursor) -// return r.value; -// else -// return null; -// } -// -// string parenComplete(size_t cursor) -// { -// auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; -// Token t = tokens[index]; -// switch (tokens[index].type) -// { -// case TokenType.Version: -// return "completions\n" ~ to!string(join(map!`a ~ " k"`(versions), "\n").array()); -// case TokenType.Scope: -// return "completions\n" ~ to!string(join(map!`a ~ " k"`(scopes), "\n").array()); -// case TokenType.If: -// case TokenType.Cast: -// case TokenType.While: -// case TokenType.For: -// case TokenType.Foreach: -// case TokenType.Switch: -// return ""; -// default: -// size_t startIndex = findBeginningOfExpression(tokens, index); -// auto callChain = splitCallChain(tokens[startIndex .. index + 1]); -// auto expressionType = getTypeOfExpression( -// callChain[0 .. $ - 1], tokens, cursor); -// return "calltips\n" ~ to!string(context.getCallTipsFor(expressionType, -// callChain[$ - 1].value, cursor).join("\n").array()); -// } -// } -// -// string dotComplete(size_t cursor) -// { -// stderr.writeln("dotComplete"); -// auto index = assumeSorted(tokens).lowerBound(cursor).length - 1; -// Token t = tokens[index]; -// -// // If the last character entered before the cursor isn't a dot, give up. -// // The user was probably in the middle of typing the slice or vararg -// // operators -// if (t != TokenType.Dot) -// return null; -// -// size_t startIndex = findBeginningOfExpression(tokens, index); -// if (startIndex - 1 < tokens.length && tokens[startIndex - 1] == TokenType.Import) -// { -// return importComplete(splitCallChain(tokens[startIndex .. index])); -// } -// -// auto expressionType = getTypeOfExpression( -// splitCallChain(tokens[startIndex .. index]), tokens, cursor); -// -// stderr.writeln("expression type is ", expressionType); -// -// // Complete pointers and references the same way -// if (expressionType[$ - 1] == '*') -// expressionType = expressionType[0 .. $ - 1]; -// -// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( -// expressionType); -// if (typeMap is null) -// return ""; -// auto app = appender!(string[])(); -// foreach (k, t; typeMap) -// app.put(k ~ " " ~ t[1]); -// return to!string(array(join(sort!("a.toLower() < b.toLower()")(app.data), "\n"))); -// } -// -// string importComplete(const(Token)[] tokens) -// { -// stderr.writeln("importComplete"); -// auto app = appender!(string[])(); -// string part = to!string(map!"a.value.dup"(tokens).join("/").array()); -// foreach (path; context.importDirectories) -// { -// stderr.writeln("Searching for ", path, "/", part); -// if (!exists(buildPath(path, part))) -// continue; -// stderr.writeln("found it"); -// foreach (DirEntry dirEntry; dirEntries(buildPath(path, part), -// SpanMode.shallow)) -// { -// if (dirEntry.isDir) -// app.put(baseName(dirEntry.name) ~ " P"); -// else if (dirEntry.name.endsWith(".d", ".di")) -// app.put(stripExtension(baseName(dirEntry.name)) ~ " M"); -// } -// } -// return to!string(sort!("a.toLower() < b.toLower()")(app.data).join("\n").array()); -// } -// -// const(Token)[] tokens; -// CompletionContext context; -//} -// -//unittest -//{ -// auto code = q{ -//struct TestStruct { int a; int b; } -//TestStruct ts; -//ts.a. -// }; -// -// auto tokens = tokenize(code); -// auto mod = parseModule(tokens); -// auto context = new CompletionContext(mod); -// auto completion = AutoComplete(tokens, context); -// assert (completion.getTypeOfExpression(splitCallChain(tokens[13 .. 16]), -// tokens, 56) == "int"); -//} diff --git a/build.sh b/build.sh index 6b6e9a3..eb2ae8a 100755 --- a/build.sh +++ b/build.sh @@ -1,3 +1,3 @@ #dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline #dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest -ldc2 -O4 *.d std/d/*.d -of=dscanner -release +ldc2 -O5 *.d std/d/*.d -of=dscanner -release -vectorize -m64 diff --git a/cache.d b/cache.d deleted file mode 100644 index 6398422..0000000 --- a/cache.d +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module cache; - -import etc.c.sqlite3; -import std.c.stdlib; -import std.datetime; -import std.file; -import std.uuid; -import std.array; -import std.string; -import std.conv; -import std.d.lexer; - -import location; -import parser; -import types; - -private sqlite3* database; - -version (Posix) -{ - private immutable char* DB_PATH = "~/.dscanner/cache.db"; -} -else version (Windows) -{ - pragma(msg, "Caching not supported on Windows yet"); - immutable string DB_PATH = ""; -} - -private enum Queries : string -{ - getUpdateTime = "select mtime from files where filepath = ?", - insertContainer = "insert into containers values ()", - deleteContainer = "delete from containers where fileId = ?", - deleteSymbol = "delete from symbols where containerId = ?", - deleteFile = "delete from files where path = ?", - getPublicImports = "select importedId from publicImports where importerId = ?", - getModuleId = "select id from files where path = ?", - getContainersByModule = "select id from containers where fileId = ?" -} - -private sqlite3* getDatabase() -{ - if (database !is null) - return database; - int status = sqlite3_open(DB_PATH, &database); - if (status != SQLITE_OK) - { - throw new Exception("Could not open %s: %s".format(DB_PATH, - sqlite3_errmsg(database))); - } - return database; -} - -void closeDatabase() -{ - if (database !is null) - { - sqlite3_close(database); - database = null; - } -} - -private long getCachedModTime(sqlite3* db, sqlite3_stmt* statement, string filePath) -{ - bindText(statement, 1, filePath); - if (sqlite3_step(statement) != SQLITE_ROW) - throw new Exception("%s".format(sqlite3_errmsg(db))); - return sqlite3_column_int64(statement, 1); -} - -/** - * Updates the sqlite database with current autocomplete information for the - * given modules. - */ -void updateCache(string dirs[], string moduleNames[]) -{ - string[] filePaths; - foreach (moduleName; moduleNames) - { - string path = findAbsPath(dirs, moduleName); - if (path is null) - continue; - filePaths ~= path; - } - - sqlite3* db = getDatabase(); - sqlite3_stmt* statement; - scope(exit) { if (statement) sqlite3_finalize(statement); } - char* pzTail; - scope(exit) { if (pzTail) free(pzTail); } - sqlite3_prepare_v2(db, Queries.getUpdateTime.toStringz(), - cast(int) Queries.getUpdateTime.length + 1, &statement, &pzTail); - - foreach (string filePath; filePaths) - { - immutable long mtime = getCachedModTime(db, statement, filePath); - SysTime timeLastModified = timeLastModified(filePath); - // if the times match, we don't need to update the cache. - if (timeLastModified.stdTime == mtime) - continue; - -// // re-parse the module -// Module m = parseModule(byToken(readText(filePath)).array()); -// -// updateCache(m); - - sqlite3_reset(statement); - } -} - -private void updateCache(const Module m) -in -{ - assert(m !is null); -} -body -{ -} - -private string[] getImportedModules(string modulePath, sqlite3_stmt* statement = null) -{ - auto app = appender!(string[])(); - sqlite3* db = getDatabase(); - bool statementAllocated = false; - scope(exit) { if (statementAllocated && statement !is null) sqlite3_finalize(statement); } - if (statement is null) - { - statementAllocated = true; - char* pzTail; - scope(exit) { if (pzTail) free(pzTail); } - sqlite3_prepare_v2(db, Queries.getPublicImports.toStringz(), - cast(int) Queries.getPublicImports.length + 1, &statement, &pzTail); - } - - string moduleId = getModuleIdFromPath(modulePath); - bindText(statement, 1, moduleId); - while (sqlite3_step(statement) == SQLITE_ROW) - { - app.put(to!string(sqlite3_column_text(statement, 1))); - } - sqlite3_reset(statement); - foreach (string imported; app.data) - { - string[] r = getImportedModules(imported, statement); - } - return app.data; -} - -private string getModuleIdFromPath(string filePath) -{ - sqlite3* db = getDatabase(); - sqlite3_stmt* statement; - char* pzTail; - scope(exit) if (pzTail) free(pzTail); - sqlite3_prepare_v2(db, Queries.getModuleId.toStringz(), - cast(int) Queries.getModuleId.length + 1, &statement, - &pzTail); - bindText(statement, 1, filePath); - if (sqlite3_step(statement) != SQLITE_ROW) - return null; - return to!string(sqlite3_column_text(statement, 1)); -} - -/** - * Returns: the container IDs of the containers that have - * been imported - */ -public string[] getContainersImported(string modulePath) -{ - immutable string moduleId = getModuleIdFromPath(modulePath); - sqlite3* db = getDatabase(); - sqlite3_stmt* statement; - char* pzTail; - scope(exit) if (pzTail) free(pzTail); - string[] moduleIds = getImportedModules(modulePath); - string[] containerIds; - foreach (string id; moduleIds) - { - containerIds ~= getContainersByModule(id); - } - return containerIds; -} - -private string[] getContainersByModule(string moduleId) -{ - sqlite3* db = getDatabase(); - sqlite3_stmt* statement; - scope(exit) if (statement !is null) sqlite3_finalize(statement); - char* pzTail; - prepareStatement(db, statement, Queries.getContainersByModule); - bindText(statement, 1, moduleId); - string[] rVal; - while (sqlite3_step(statement) == SQLITE_ROW) - { - rVal ~= to!string(sqlite3_column_text(statement, 1)); - } - return rVal; -} - -private void prepareStatement(sqlite3* db, sqlite3_stmt* statement, string query) -{ - char* pzTail; - scope(exit) if (pzTail) free(pzTail); - sqlite3_prepare_v2(db, query.toStringz(), cast(int) query.length + 1, - &statement, &pzTail); -} - -private void bindText(sqlite3_stmt* statement, int argPos, string text) -{ - sqlite3_bind_text(statement, argPos, text.toStringz(), - cast(int) text.length + 1, SQLITE_TRANSIENT); -} diff --git a/parser.d b/parser.d deleted file mode 100644 index ab6ae12..0000000 --- a/parser.d +++ /dev/null @@ -1,975 +0,0 @@ - -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module parser; - -import std.stream; -import std.array; -import std.stdio; -import std.algorithm; -import std.range; -import std.d.lexer; - -import types; -import langutils; -import circularbuffer; - -alias CircularBuffer!Token TokenBuffer; - -class Balanced : TokenBuffer -{ -public: - - this(InputRange!Token tokens, TokenType open, TokenType close) - { - super(0, tokens); - this.range = tokens; - this.open = open; - this.close = close; - } - - override bool empty() @property - { - return _empty; - } - - override Token front() @property - { - return range.front; - } - - override void popFront() - { - range.popFront(); - if (range.front == open) - ++depth; - else if (range.front == close) - --depth; - _empty = depth == 0 || range.empty; - } - - invariant() - { - assert (range); - assert (depth >= 0); - } - -private: - int depth; - TokenType open; - TokenType close; - InputRange!(Token) range; - bool _empty; -} - -/** - * Params: - * tokens = the array of tokens - * index = an index into tokens such that tokens.front.type == open - * open = the opening delimiter - * close = the closing delimiter - * Returns: all tokens that are between the balanced delimiters that start at - * tokens.front, not including the delimiters. If the delimiters in tokens - * are not balanced, this function will return tokens[index + 1 .. $]; - */ -Balanced betweenBalanced(TokenBuffer tokens, - TokenType open, TokenType close) -in -{ - assert (tokens.front == open); -} -body -{ - return new Balanced(tokens, open, close); -} - - -/** - * See_also: betweenBalanced - */ -Balanced betweenBalancedBraces(TokenBuffer tokens) -{ - return betweenBalanced(tokens, TokenType.lBrace, TokenType.rBrace); -} - - -/** - * See_also: betweenBalanced - */ -Balanced betweenBalancedParens(TokenBuffer tokens) -{ - return betweenBalanced(tokens, TokenType.lParen, TokenType.rParen); -} - - -/** - * See_also: betweenBalanced - */ -Balanced betweenBalancedBrackets(TokenBuffer tokens) -{ - return betweenBalanced(tokens, TokenType.lBracket, TokenType.rBracket); -} - -void skipBalanced(alias openToken, alias closeToken)(TokenBuffer tokens) -in -{ - assert (tokens.front == openToken); -} -body -{ - int depth = 1; - tokens.popFront(); - while (!tokens.empty && depth != 0) - { - switch (tokens.front.type) - { - case openToken: ++depth; break; - case closeToken: --depth; break; - default: break; - } - tokens.popFront(); - } -} - -void skipParens(TokenBuffer tokens) -{ - skipBalanced!(TokenType.lParen, TokenType.rParen)(tokens); -} - -void skipBrackets(TokenBuffer tokens) -{ - skipBalanced!(TokenType.lBracket, TokenType.rBracket)(tokens); -} - -void skipBraces(TokenBuffer tokens) -{ - skipBalanced!(TokenType.lBrace, TokenType.rBrace)(tokens); -} - -/** - * Params: - * tokens = the token array to examine - * index = an indext into tokens such that tokens.front.type == open - * open = the opening delimiter - * close = the closing delimiter - * Returns: a string representing the contents of the two delimiters. This will - * not preserve whitespace, but it will place a single space character after - * a comma and between identifiers. - */ -string content(TokenBuffer tokens, TokenType open, TokenType close) -in -{ - assert (tokens.front == open); -} -body -{ - auto app = appender!string(); - int depth = 1; - foreach (t; betweenBalanced(tokens, open, close)) - { - if (t == TokenType.comma) - app.put(", "); - else - app.put(t.value); - } - return app.data; -} - - -/** - * See_also: content - */ -string parenContent(TokenBuffer tokens) -{ - return "(" ~ content(tokens, TokenType.lParen, TokenType.rParen) ~ ")"; -} - - -/** - * See_also: content - */ -string bracketContent(TokenBuffer tokens) -{ - return "[" ~ content(tokens, TokenType.lBracket, TokenType.rBracket) ~ "]"; -} - - -/** - * Advances index until it indexes a character in tokens after a right brace if - * index initially indexed a right brace, or advances index until it indexes a - * character after a simicolon otherwise. - */ -void skipBlockStatement(TokenBuffer tokens) -{ - if (tokens.front == TokenType.lBrace) - skipBraces(tokens); - else - skipPastNext(tokens, TokenType.semicolon); -} - - -/** - * Advances index until it indexes a character in tokens directly after a token - * of type type. This function handles nesting of braces, brackets, and - * parenthesis - */ -void skipPastNext(TokenBuffer tokens, TokenType type) -{ - while (!tokens.empty) - { - if (tokens.front.type == TokenType.lBrace) - skipBraces(tokens); - else if (tokens.front.type == TokenType.lParen) - skipParens(tokens); - else if (tokens.front.type == TokenType.lBracket) - skipBrackets(tokens); - else if (tokens.front.type == type) - { - tokens.popFront(); - return; - } - else - tokens.popFront(); - } -} - -string parseTypeDeclaration(TokenBuffer tokens) -{ - auto type = tokens.front.value; - tokens.popFront(); - buildingType: while (!tokens.empty) - { - switch (tokens.front.type) - { - case TokenType.lBracket: - type ~= bracketContent(tokens); - break; - case TokenType.not: - type ~= tokens.front.value; - tokens.popFront(); - if (tokens.front == TokenType.lParen) - type ~= parenContent(tokens); - else - { - type ~= tokens.front.value; - tokens.popFront(); - } - break; - case TokenType.star: - case TokenType.bitAnd: - type ~= tokens.front.value; - tokens.popFront(); - break; - case TokenType.function_: - type ~= " " ~ tokens.front.value; - tokens.popFront(); - type ~= parenContent(tokens); - break; - default: - break buildingType; - } - } - return type; -} - -/** - * Parses a module from a token array. - * Params: - * protection = the default protection level for a block statement - * attributes = the default attributes for a block statement - * Returns: the parsed module - */ -Module parseModule(TokenBuffer tokens, string protection = "public", string[] attributes = []) -{ - string type; - string name; - string localProtection = ""; - string[] localAttributes = []; - - void resetLocals() - { - type = ""; - name = ""; - localProtection = ""; - localAttributes = []; - } - - Module mod = new Module; - size_t index = 0; - while(!tokens.empty) - { - switch(tokens.front.type) - { - case TokenType.pragma_: - tokens.popFront(); - if (tokens.front == TokenType.lParen) - skipParens(tokens); - break; - case TokenType.mixin_: - case TokenType.assert_: - tokens.popFront(); - tokens.skipBlockStatement(); - break; - case TokenType.alias_: - Alias a = parseAlias(tokens, - localProtection.empty() ? protection : localProtection, - attributes); - mod.aliases ~= a; - break; - case TokenType.import_: - mod.imports ~= parseImports(tokens); - resetLocals(); - break; - case TokenType.version_: - tokens.popFront(); - if (tokens.front == TokenType.lParen) - { - tokens.betweenBalancedParens(); - if (tokens.front == TokenType.lBrace) - { - auto braceContent = tokens.betweenBalancedBraces(); - mod.merge(parseModule(braceContent, - localProtection.empty() ? protection : localProtection, - attributes)); - } - } - else if (tokens.front == TokenType.assign) - tokens.skipBlockStatement(); - break; - case TokenType.deprecated_: - case TokenType.nothrow_: - case TokenType.override_: - case TokenType.synchronized_: - case TokenType.abstract_: - case TokenType.final_: - case TokenType.gshared: - case TokenType.static_: - localAttributes ~= tokens.front.value; - tokens.popFront(); - break; - case TokenType.const_: - case TokenType.immutable_: - case TokenType.inout_: - case TokenType.pure_: - case TokenType.scope_: - case TokenType.shared_: - auto tmp = tokens.front.value; - tokens.popFront(); - if (tokens.front == TokenType.lParen) - type = tmp ~ tokens.parenContent(); - else if (tokens.front == TokenType.colon) - { - index++; - attributes ~= tmp; - } - localAttributes ~= tmp; - break; - case TokenType.align_: - case TokenType.extern_: - string attribute = tokens.front.value; - tokens.popFront(); - if (tokens.front == TokenType.lParen) - attribute ~= parenContent(tokens); - if (tokens.front == TokenType.lBrace) - mod.merge(parseModule(tokens.betweenBalancedBraces(), - localProtection.empty() ? protection : localProtection, - attributes ~ attribute)); - else if (tokens.front == TokenType.colon) - { - tokens.popFront(); - attributes ~= attribute; - } - else - localAttributes ~= attribute; - break; - case TokenType.export_: .. case TokenType.public_: - string p = tokens.front.value; - tokens.popFront(); - if (tokens.front == TokenType.colon) - { - protection = p; - tokens.popFront(); - } - else if (tokens.front == TokenType.lBrace) - mod.merge(parseModule(tokens.betweenBalancedBraces(), - p, attributes ~ localAttributes)); - else - localProtection = p; - break; - case TokenType.module_: - tokens.popFront(); - while (!tokens.empty && tokens.front != TokenType.semicolon) - { - mod.name ~= tokens.front.value; - tokens.popFront(); - } - tokens.popFront(); - resetLocals(); - break; - case TokenType.union_: - mod.unions ~= parseUnion(tokens, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - resetLocals(); - break; - case TokenType.class_: - mod.classes ~= parseClass(tokens, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - resetLocals(); - break; - case TokenType.interface_: - mod.interfaces ~= parseInterface(tokens, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - resetLocals(); - break; - case TokenType.struct_: - mod.structs ~= parseStruct(tokens, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - resetLocals(); - break; - case TokenType.enum_: - mod.enums ~= parseEnum(tokens, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - resetLocals(); - break; - case TokenType.template_: - tokens.popFront(); // template - tokens.popFront(); // name - if (tokens.front == TokenType.lParen) - tokens.betweenBalancedParens(); // params - if (tokens.front == TokenType.lBrace) - tokens.betweenBalancedBraces(); // body - resetLocals(); - break; - case TokenType.bool_: .. case TokenType.wstring_: - case TokenType.auto_: - case TokenType.identifier: - if (type.empty()) - { - type = tokens.parseTypeDeclaration(); - } - else - { - name = tokens.front.value; - tokens.popFront(); - if (tokens.empty) break; - if (tokens.front == TokenType.lParen) - { - mod.functions ~= parseFunction(tokens, type, name, - tokens.front.line, - localProtection.empty() ? protection : localProtection, - attributes ~ localAttributes); - } - else - { - Variable v = new Variable; - v.name = name; - v.type = type; - v.attributes = localAttributes ~ attributes; - v.protection = localProtection.empty() ? protection : localProtection; - v.line = tokens.front.line; - mod.variables ~= v; - } - resetLocals(); - } - break; - case TokenType.unittest_: - tokens.popFront(); - if (!tokens.empty() && tokens.front == TokenType.lBrace) - tokens.skipBlockStatement(); - resetLocals(); - break; - case TokenType.tilde: - tokens.popFront(); - if (tokens.front == TokenType.this_) - { - name = "~"; - goto case; - } - break; - case TokenType.this_: - name ~= tokens.front.value; - tokens.popFront(); - if (!tokens.empty && tokens.front == TokenType.lParen) - { - mod.functions ~= parseFunction(tokens, "", name, - tokens.peek(-1).line, - localProtection.empty() ? protection : localProtection, - localAttributes ~ attributes); - } - resetLocals(); - break; - default: - tokens.popFront(); - break; - } - } - return mod; -} - - -/** - * Parses an import statement - * Returns: only the module names that were imported, not which symbols were - * selectively improted. - */ -string[] parseImports(TokenBuffer tokens) -{ - assert(tokens.front == TokenType.import_); - tokens.popFront(); - auto app = appender!(string[])(); - string im; - while (!tokens.empty) - { - switch(tokens.front.type) - { - case TokenType.comma: - tokens.popFront(); - app.put(im); - im = ""; - break; - case TokenType.assign: - case TokenType.semicolon: - app.put(im); - tokens.popFront(); - return app.data; - case TokenType.colon: - app.put(im); - tokens.skipBlockStatement(); - return app.data; - default: - im ~= tokens.front.value; - tokens.popFront(); - break; - } - } - return app.data; -} - - -/** - * Parses an enum declaration - */ -Enum parseEnum(TokenBuffer tokens, string protection, string[] attributes) -in -{ - assert (tokens.front == TokenType.enum_); -} -body -{ - Enum e = new Enum; - e.line = tokens.front.line; - tokens.popFront(); - string enumType; - e.protection = protection; - - if (tokens.front == TokenType.lBrace) - goto enumBody; - - if (isIdentifierOrType(tokens.front.type)) - { - if (tokens.canPeek() && tokens.peek() == TokenType.identifier) - { - // enum long l = 4; - EnumMember m; - m.type = tokens.front.value; - tokens.popFront(); - m.line = tokens.front.line; - e.name = m.name = tokens.front.value; - e.members ~= m; - tokens.skipBlockStatement(); - return e; - } - else if (tokens.canPeek() && tokens.peek() == TokenType.assign) - { - // enum m = "abcd"; - e.name = tokens.front.value; - EnumMember m; - m.name = e.name; - m.line = tokens.front.line; - m.type = getTypeFromToken(tokens.peek(2)); - e.members ~= m; - tokens.skipBlockStatement(); - return e; - } - } - - if (isIdentifierOrType(tokens.front.type)) - { - e.name = tokens.front.value; - tokens.popFront(); - } - - if (tokens.front == TokenType.colon) - { - tokens.popFront(); - if (!isIdentifierOrType(tokens.front.type)) - tokens.skipBlockStatement(); - else - { - enumType = tokens.front.value; - tokens.popFront(); - } - } - -enumBody: -// -// auto r = tokens.betweenBalancedBraces(); -// while (!r.empty) -// { -// EnumMember m; -// if (isIdentifierOrType(r.front) && i + 1 < r.length && isIdentifierOrType(r[i + 1])) -// { -// m.line = r[i + 1].line; -// m.name = r[i + 1].value; -// m.type = r.front.value; -// } -// else if (isIdentifierOrType(r.front) && i + 1 < r.length && r[i + 1] == TokenType.Assign) -// { -// if (enumType == null && i + 2 < r.length) -// m.type = getTypeFromToken(r[i + 2]); -// else -// m.type = enumType; -// m.line = r.front.line; -// m.name = r.front.value; -// } -// else -// { -// m.line = r.front.line; -// m.name = r.front.value; -// m.type = enumType == null ? "int" : enumType; -// } -// e.members ~= m; -// skipPastNext(r, TokenType.comma, i); -// } - return e; -} - - -/** - * Parses a function declaration - */ -Function parseFunction(TokenBuffer tokens, string type, - string name, uint line, string protection, string[] attributes) -in -{ - assert (tokens.front == TokenType.lParen); -} -body -{ - Function f = new Function; - f.name = name; - f.returnType = type; - f.line = line; - f.attributes.insertInPlace(f.attributes.length, attributes); - - Variable[] vars1 = parseParameters(tokens); - if (!tokens.empty && tokens.front == TokenType.lParen) - { - f.templateParameters.insertInPlace(f.templateParameters.length, - map!("a.type")(vars1)); - f.parameters.insertInPlace(f.parameters.length, - parseParameters(tokens)); - } - else - f.parameters.insertInPlace(f.parameters.length, vars1); - - attributeLoop: while(!tokens.empty) - { - switch (tokens.front.type) - { - case TokenType.immutable_: - case TokenType.const_: - case TokenType.pure_: - case TokenType.nothrow_: - case TokenType.final_: - case TokenType.override_: - f.attributes ~= tokens.front.value; - tokens.popFront(); - break; - default: - break attributeLoop; - } - } - - if (!tokens.empty && tokens.front == TokenType.if_) - f.constraint = parseConstraint(tokens); - - while (!tokens.empty && - (tokens.front == TokenType.in_ || tokens.front == TokenType.out_ - || tokens.front == TokenType.body_)) - { - tokens.popFront(); - if (!tokens.empty && tokens.front == TokenType.lParen - && tokens.peek(-1) == TokenType.out_) - { - tokens.skipParens(); - } - - if (!tokens.empty && tokens.front == TokenType.lBrace) - tokens.skipBlockStatement(); - } - if (!tokens.empty) - return f; - if (tokens.front == TokenType.lBrace) - tokens.skipBlockStatement(); - else if (tokens.front == TokenType.semicolon) - tokens.popFront(); - return f; -} - -string parseConstraint(TokenBuffer tokens) -{ - auto appender = appender!(string)(); - assert(tokens.front == TokenType.if_); - appender.put(tokens.front.value); - tokens.popFront(); - assert(tokens.front == TokenType.lParen); - return "if " ~ tokens.parenContent(); -} - -Variable[] parseParameters(TokenBuffer tokens) -in -{ - assert (tokens.front == TokenType.lParen); -} -body -{ - auto appender = appender!(Variable[])(); - Variable v = new Variable; - auto r = betweenBalancedParens(tokens); - size_t i = 0; - while (!r.empty) - { - switch(r.front.type) - { - case TokenType.alias_: - case TokenType.in_: - case TokenType.out_: - case TokenType.ref_: - case TokenType.scope_: - case TokenType.lazy_: - case TokenType.const_: - case TokenType.immutable_: - case TokenType.shared_: - case TokenType.inout_: - auto tmp = r.front.value; - r.popFront(); - if (r.front == TokenType.lParen) - v.type ~= tmp ~ parenContent(r); - else - v.attributes ~= tmp; - break; - case TokenType.colon: - i++; - r.skipPastNext(TokenType.comma); - appender.put(v); - v = new Variable; - break; - case TokenType.comma: - ++i; - appender.put(v); - v = new Variable; - break; - default: - if (v.type.empty()) - { - v.type = r.parseTypeDeclaration(); - if (!r.empty) - appender.put(v); - } - else - { - v.line = r.front.line; - v.name = r.front.value; - r.popFront(); - appender.put(v); - if (!r.empty && r.front == TokenType.vararg) - { - v.type ~= " ..."; - } - v = new Variable; - r.skipPastNext(TokenType.comma); - } - break; - } - } - return appender.data; -} - -string[] parseBaseClassList(TokenBuffer tokens) -in -{ - assert(tokens.front == TokenType.colon); -} -body -{ - auto appender = appender!(string[])(); - tokens.popFront(); - while (!tokens.empty) - { - if (tokens.front == TokenType.identifier) - { - string base = parseTypeDeclaration(tokens); - appender.put(base); - if (tokens.front == TokenType.comma) - tokens.popFront(); - else - break; - } - else - break; - } - return appender.data; -} - -void parseStructBody(TokenBuffer tokens, Struct st) -{ - st.bodyStart = tokens.front.startIndex; - Module m = parseModule(tokens.betweenBalancedBraces()); - st.bodyEnd = tokens.peek(-1).startIndex; - st.functions.insertInPlace(0, m.functions); - st.variables.insertInPlace(0, m.variables); - st.aliases.insertInPlace(0, m.aliases); -} - - -Struct parseStructOrUnion(TokenBuffer tokens, string protection, - string[] attributes) -{ - Struct s = new Struct; - s.line = tokens.front.line; - s.attributes = attributes; - s.protection = protection; - s.name = tokens.front.value; - tokens.popFront(); - if (tokens.front == TokenType.lParen) - s.templateParameters.insertInPlace(s.templateParameters.length, - map!("a.type")(parseParameters(tokens))); - - if (tokens.empty) return s; - - if (tokens.front == TokenType.if_) - s.constraint = parseConstraint(tokens); - - if (tokens.empty) return s; - - if (tokens.front == TokenType.lBrace) - parseStructBody(tokens, s); - else - tokens.skipBlockStatement(); - return s; -} - -Struct parseStruct(TokenBuffer tokens, string protection, - string[] attributes) -in -{ - assert(tokens.front == TokenType.struct_); -} -body -{ - return parseStructOrUnion(tokens, protection, attributes); -} - -Struct parseUnion(TokenBuffer tokens, string protection, string[] attributes) -in -{ - assert(tokens.front == TokenType.union_); -} -body -{ - tokens.popFront(); - return parseStructOrUnion(tokens, protection, attributes); -} - -Inherits parseInherits(TokenBuffer tokens, string protection, string[] attributes) -{ - auto i = new Inherits; - i.line = tokens.front.line; - i.name = tokens.front.value; - tokens.popFront(); - i.protection = protection; - i.attributes.insertInPlace(i.attributes.length, attributes); - if (tokens.front == TokenType.lParen) - i.templateParameters.insertInPlace(i.templateParameters.length, - map!("a.type")(parseParameters(tokens))); - - if (tokens.empty) return i; - - if (tokens.front == TokenType.if_) - i.constraint = parseConstraint(tokens); - - if (tokens.empty) return i; - - if (tokens.front == TokenType.colon) - i.baseClasses = parseBaseClassList(tokens); - - if (tokens.empty) return i; - - if (tokens.front == TokenType.lBrace) - parseStructBody(tokens, i); - else - tokens.skipBlockStatement(); - return i; -} - -Inherits parseInterface(TokenBuffer tokens, string protection, string[] attributes) -in -{ - assert (tokens.front == TokenType.interface_); -} -body -{ - tokens.popFront(); - return parseInherits(tokens, protection, attributes); -} - - -Inherits parseClass(TokenBuffer tokens, string protection, string[] attributes) -in -{ - assert(tokens.front == TokenType.class_); -} -body -{ - tokens.popFront(); - return parseInherits(tokens, protection, attributes); -} - - -/** - * Parse an alias declaration. - * Note that the language spec mentions a "AliasInitializerList" in the grammar, - * but there seems to be no example of this being used, nor has the compiler - * accepted any of my attempts to create one. Therefore, it's not supported here - */ -Alias parseAlias(TokenBuffer tokens, string protection, string[] attributes) -in -{ - assert(tokens.front == TokenType.alias_); -} -body -{ - tokens.popFront(); - Alias a = new Alias; - a.aliasedType = parseTypeDeclaration(tokens); - a.attributes = attributes; - a.protection = protection; - if (tokens.front == TokenType.identifier) - { - a.name = tokens.front.value; - a.line = tokens.front.line; - skipBlockStatement(tokens); - } - else - return null; - return a; -} diff --git a/std/d/lexer.d b/std/d/lexer.d index 81c10c3..b8741dc 100644 --- a/std/d/lexer.d +++ b/std/d/lexer.d @@ -412,8 +412,7 @@ private: this(ref R range) { this.range = range; - buffer = uninitializedArray!(ubyte[])(config.bufferSize); - cache.initialize(); + buffer = uninitializedArray!(ubyte[])(bufferSize); } /* @@ -512,7 +511,7 @@ private: current.type = TokenType.div; current.value = "/"; advanceRange(); - break; + return; } switch (r.front) { @@ -544,12 +543,12 @@ private: current.type = TokenType.dot; current.value = getTokenValue(TokenType.dot); advanceRange(); - break outer; + return; } else if (r.front >= '0' && r.front <= '9') { lexNumber(); - break outer; + return; } else if (r.front == '.') { @@ -568,22 +567,23 @@ private: advanceRange(); } current.value = getTokenValue(current.type); + return; } else { advanceRange(); current.type = TokenType.dot; current.value = getTokenValue(TokenType.dot); + return; } - break; case '0': .. case '9': lexNumber(); - break; + return; case '\'': case '"': case '`': lexString(); - break; + return; case 'q': static if (isArray!R) auto r = range[index .. $]; @@ -593,12 +593,12 @@ private: if (!r.isRangeEoF() && r.front == '{') { lexTokenString(); - break; + return; } else if (!r.isRangeEoF() && r.front == '"') { lexDelimitedString(); - break; + return; } else goto default; @@ -611,7 +611,7 @@ private: if (!r.isRangeEoF() && r.front == '"') { lexString(); - break; + return; } else goto default; @@ -624,13 +624,13 @@ private: if (!r.isRangeEoF() && r.front == '"') { lexHexString(); - break; + return; } else goto default; case '#': lexSpecialTokenSequence(); - break; + return; default: while(!isEoF() && !isSeparating(currentElement())) { @@ -649,7 +649,7 @@ private: } if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial)) - break; + return; switch (current.type) { @@ -657,12 +657,12 @@ private: current.type = TokenType.stringLiteral; auto time = Clock.currTime(); current.value = format("%s %02d %04d", time.month, time.day, time.year); - break; + return; case TokenType.time: auto time = Clock.currTime(); current.type = TokenType.stringLiteral; current.value = (cast(TimeOfDay)(time)).toISOExtString(); - break; + return; case TokenType.timestamp: auto time = Clock.currTime(); auto dt = cast(DateTime) time; @@ -670,27 +670,26 @@ private: current.value = format("%s %s %02d %02d:%02d:%02d %04d", dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.year); - break; + return; case TokenType.vendor: current.type = TokenType.stringLiteral; current.value = config.vendorString; - break; + return; case TokenType.compilerVersion: current.type = TokenType.stringLiteral; current.value = format("%d", config.versionNumber); - break; + return; case TokenType.line: current.type = TokenType.intLiteral; current.value = format("%d", current.line); - break; + return; case TokenType.file: current.type = TokenType.stringLiteral; current.value = config.fileName; - break; + return; default: - break; + return; } - break; } } @@ -702,7 +701,7 @@ private: keepChar(); } if (config.iterStyle & IterationStyle.includeWhitespace) - setTokenValue(); + setTokenValue(); } void lexComment() @@ -768,7 +767,7 @@ private: assert(false); } if (config.iterStyle & IterationStyle.includeComments) - setTokenValue(); + setTokenValue(); } void lexHexString() @@ -779,8 +778,8 @@ private: body { current.type = TokenType.stringLiteral; - keepChar(); - keepChar(); + keepChar(); + keepChar(); while (true) { if (isEoF()) @@ -805,17 +804,17 @@ private: { errorMessage(format("Invalid character '%s' in hex string literal", cast(char) currentElement())); - return; + return; } } lexStringSuffix(); if (config.tokenStyle & TokenStyle.notEscaped) - { - if (config.tokenStyle & TokenStyle.includeQuotes) - setTokenValue(); - else - setTokenValue(bufferIndex - 1, 2); - } + { + if (config.tokenStyle & TokenStyle.includeQuotes) + setTokenValue(); + else + setTokenValue(bufferIndex - 1, 2); + } else { auto a = appender!(ubyte[])(); @@ -1154,9 +1153,9 @@ private: else { if (buffer[0] == 'r') - setTokenValue(bufferIndex - 1, 2); + setTokenValue(bufferIndex - 1, 2); else - setTokenValue(bufferIndex - 1, 1); + setTokenValue(bufferIndex - 1, 1); } } @@ -1245,7 +1244,7 @@ private: if (config.tokenStyle & TokenStyle.includeQuotes) setTokenValue(); else - setTokenValue(bufferIndex - 2, 3); + setTokenValue(bufferIndex - 2, 3); } while (true) { @@ -1328,7 +1327,7 @@ private: size_t e = bufferIndex; if (buffer[e - 1] == 'c' || buffer[e - 1] == 'd' || buffer[e - 1] == 'w') --e; - setTokenValue(e, b); + setTokenValue(e, b); } } @@ -1468,8 +1467,8 @@ private: void keepNonNewlineChar() { - if (bufferIndex + 2 >= buffer.length) - buffer.length += (1024 * 4); + if (bufferIndex >= buffer.length) + buffer.length += 1024; static if (isArray!R) buffer[bufferIndex++] = range[index++]; else @@ -1483,7 +1482,7 @@ private: void keepChar() { if (bufferIndex + 2 >= buffer.length) - buffer.length += (1024 * 4); + buffer.length += 1024; bool foundNewline; if (currentElement() == '\r') { @@ -1531,7 +1530,7 @@ private: } } - ElementType!R currentElement() + ElementType!R currentElement() const { assert (index < range.length, "%d, %d".format(index, range.length)); static if (isArray!R) @@ -1547,25 +1546,24 @@ private: ++index; } - void setTokenValue(size_t endIndex = 0, size_t startIndex = 0) - { - if (endIndex == 0) - endIndex = bufferIndex; - current.value = cache.get(buffer[startIndex .. endIndex]); - } + void setTokenValue(size_t endIndex = 0, size_t startIndex = 0) + { + if (endIndex == 0) + endIndex = bufferIndex; + current.value = cache.get(buffer[startIndex .. endIndex]); + } - bool isEoF() + bool isEoF() const { static if (isArray!R) { -// import std.stdio; -// stderr.writefln("%d %d", index, range.length); return index >= range.length || range[index] == 0 || range[index] == 0x1a; } else return range.empty || range.front == 0 || range.front == 0x1a; } + immutable bufferSize = 1024 * 8; Token current; uint lineNumber; size_t index; @@ -1575,7 +1573,7 @@ private: ubyte[] buffer; size_t bufferIndex; LexerConfig config; - StringCache cache; + StringCache cache; } /** @@ -1878,201 +1876,201 @@ pure nothrow bool isRangeEoF(R)(ref R range) * generated. */ immutable(string[TokenType.max + 1]) tokenValues = [ - "=", - "@", - "&", - "&=", - "|", - "|=", - "~=", - ":", - ",", - "--", - "/", - "/=", - "$", - ".", - "==", - "=>", - ">", - ">=", - "#", - "++", - "{", - "[", - "<", - "<=", - "<>=", - "<>", - "&&", - "||", - "(", - "-", - "-=", - "%", - "%=", - "*=", - "!", - "!=", - "!>", - "!>=", - "!<", - "!<=", - "!<>", - "+", - "+=", - "^^", - "^^=", - "}", - "]", - ")", - ";", - "<<", - "<<=", - ">>", - ">>=", - "..", - "*", - "?", - "~", - "!<>=", - ">>>", - ">>>=", - "...", - "^", - "^=", - "bool", - "byte", - "cdouble", - "cent", - "cfloat", - "char", - "creal", - "dchar", - "double", - "float", - "function", - "idouble", - "ifloat", - "int", - "ireal", - "long", - "real", - "short", - "ubyte", - "ucent", - "uint", - "ulong", - "ushort", - "void", - "wchar", - "align", - "deprecated", - "extern", - "pragma", - "export", - "package", - "private", - "protected", - "public", - "abstract", - "auto", - "const", - "final", - "__gshared", - "immutable", - "inout", - "scope", - "shared", - "static", - "synchronized", - "alias", - "asm", - "assert", - "body", - "break", - "case", - "cast", - "catch", - "class", - "continue", - "debug", - "default", - "delegate", - "delete", - "do", - "else", - "enum", - "false", - "finally", - "foreach", - "foreach_reverse", - "for", - "goto", - "if", - "import", - "in", - "interface", - "invariant", - "is", - "lazy", - "macro", - "mixin", - "module", - "new", - "nothrow", - "null", - "out", - "override", - "pure", - "ref", - "return", - "struct", - "super", - "switch", - "template", - "this", - "throw", - "true", - "try", - "typedef", - "typeid", - "typeof", - "union", - "unittest", - "version", - "volatile", - "while", - "with", - "__DATE__", - "__EOF__", - "__TIME__", - "__TIMESTAMP__", - "__VENDOR__", - "__VERSION__", - "__FILE__", - "__LINE__", - null, - null, - null, - "__traits", - "__parameters", - "__vector", - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, + "=", + "@", + "&", + "&=", + "|", + "|=", + "~=", + ":", + ",", + "--", + "/", + "/=", + "$", + ".", + "==", + "=>", + ">", + ">=", + "#", + "++", + "{", + "[", + "<", + "<=", + "<>=", + "<>", + "&&", + "||", + "(", + "-", + "-=", + "%", + "%=", + "*=", + "!", + "!=", + "!>", + "!>=", + "!<", + "!<=", + "!<>", + "+", + "+=", + "^^", + "^^=", + "}", + "]", + ")", + ";", + "<<", + "<<=", + ">>", + ">>=", + "..", + "*", + "?", + "~", + "!<>=", + ">>>", + ">>>=", + "...", + "^", + "^=", + "bool", + "byte", + "cdouble", + "cent", + "cfloat", + "char", + "creal", + "dchar", + "double", + "float", + "function", + "idouble", + "ifloat", + "int", + "ireal", + "long", + "real", + "short", + "ubyte", + "ucent", + "uint", + "ulong", + "ushort", + "void", + "wchar", + "align", + "deprecated", + "extern", + "pragma", + "export", + "package", + "private", + "protected", + "public", + "abstract", + "auto", + "const", + "final", + "__gshared", + "immutable", + "inout", + "scope", + "shared", + "static", + "synchronized", + "alias", + "asm", + "assert", + "body", + "break", + "case", + "cast", + "catch", + "class", + "continue", + "debug", + "default", + "delegate", + "delete", + "do", + "else", + "enum", + "false", + "finally", + "foreach", + "foreach_reverse", + "for", + "goto", + "if", + "import", + "in", + "interface", + "invariant", + "is", + "lazy", + "macro", + "mixin", + "module", + "new", + "nothrow", + "null", + "out", + "override", + "pure", + "ref", + "return", + "struct", + "super", + "switch", + "template", + "this", + "throw", + "true", + "try", + "typedef", + "typeid", + "typeof", + "union", + "unittest", + "version", + "volatile", + "while", + "with", + "__DATE__", + "__EOF__", + "__TIME__", + "__TIMESTAMP__", + "__VENDOR__", + "__VERSION__", + "__FILE__", + "__LINE__", + null, + null, + null, + "__traits", + "__parameters", + "__vector", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, ]; pure string getTokenValue(const TokenType type) @@ -2100,164 +2098,175 @@ pure nothrow TokenType lookupTokenType(const const(char)[] input) switch(input.length) { case 2: - switch (input) + switch (input[0]) { - case "do": return TokenType.do_; - case "if": return TokenType.if_; - case "in": return TokenType.in_; - case "is": return TokenType.is_; + case 'd': if (input == "do") return TokenType.do_; else break; + case 'i': + if (input == "if") return TokenType.if_; + else if (input == "in") return TokenType.in_; + else if (input == "is") return TokenType.is_; + else break; default: break; } break; case 3: - switch (input) + switch (input[0]) { - case "asm": return TokenType.asm_; - case "for": return TokenType.for_; - case "int": return TokenType.int_; - case "new": return TokenType.new_; - case "out": return TokenType.out_; - case "ref": return TokenType.ref_; - case "try": return TokenType.try_; + case 'a': if (input == "asm") return TokenType.asm_; else break; + case 'f': if (input == "for") return TokenType.for_; else break; + case 'i': if (input == "int") return TokenType.int_; else break; + case 'n': if (input == "new") return TokenType.new_; else break; + case 'o': if (input == "out") return TokenType.out_; else break; + case 'r': if (input == "ref") return TokenType.ref_; else break; + case 't': if (input == "try") return TokenType.try_; else break; default: break; } break; case 4: - switch (input) + switch (input[0]) { - case "auto": return TokenType.auto_; - case "body": return TokenType.body_; - case "bool": return TokenType.bool_; - case "byte": return TokenType.byte_; - case "case": return TokenType.case_; - case "cast": return TokenType.cast_; - case "cent": return TokenType.cent_; - case "char": return TokenType.char_; - case "else": return TokenType.else_; - case "enum": return TokenType.enum_; - case "goto": return TokenType.goto_; - case "lazy": return TokenType.lazy_; - case "long": return TokenType.long_; - case "null": return TokenType.null_; - case "pure": return TokenType.pure_; - case "real": return TokenType.real_; - case "this": return TokenType.this_; - case "true": return TokenType.true_; - case "uint": return TokenType.uint_; - case "void": return TokenType.void_; - case "with": return TokenType.with_; + case 'a': if (input == "auto") return TokenType.auto_; else break; + case 'b': if (input == "body") return TokenType.body_; + else if (input == "bool") return TokenType.bool_; + else if (input == "byte") return TokenType.byte_; + else break; + case 'c': if (input == "case") return TokenType.case_; + else if (input == "cast") return TokenType.cast_; + else if (input == "cent") return TokenType.cent_; + else if (input == "char") return TokenType.char_; + else break; + case 'e': if (input == "else") return TokenType.else_; + else if (input == "enum") return TokenType.enum_; + else break; + case 'g': if (input == "goto") return TokenType.goto_; else break; + case 'l': if (input == "lazy") return TokenType.lazy_; + else if (input == "long") return TokenType.long_; + else break; + case 'n': if (input == "null") return TokenType.null_; else break; + case 'p': if (input == "pure") return TokenType.pure_; else break; + case 'r': if (input == "real") return TokenType.real_; else break; + case 't': if (input == "this") return TokenType.this_; + else if (input == "true") return TokenType.true_; + else break; + case 'u': if (input == "uint") return TokenType.uint_; else break; + case 'v': if (input == "void") return TokenType.void_; else break; + case 'w': if (input == "with") return TokenType.with_; else break; default: break; } break; case 5: - switch (input) + switch (input[0]) { - case "alias": return TokenType.alias_; - case "align": return TokenType.align_; - case "break": return TokenType.break_; - case "catch": return TokenType.catch_; - case "class": return TokenType.class_; - case "const": return TokenType.const_; - case "creal": return TokenType.creal_; - case "dchar": return TokenType.dchar_; - case "debug": return TokenType.debug_; - case "false": return TokenType.false_; - case "final": return TokenType.final_; - case "float": return TokenType.float_; - case "inout": return TokenType.inout_; - case "ireal": return TokenType.ireal_; - case "macro": return TokenType.macro_; - case "mixin": return TokenType.mixin_; - case "scope": return TokenType.scope_; - case "short": return TokenType.short_; - case "super": return TokenType.super_; - case "throw": return TokenType.throw_; - case "ubyte": return TokenType.ubyte_; - case "ucent": return TokenType.ucent_; - case "ulong": return TokenType.ulong_; - case "union": return TokenType.union_; - case "wchar": return TokenType.wchar_; - case "while": return TokenType.while_; + case 'a': if (input == "alias") return TokenType.alias_; + else if (input == "align") return TokenType.align_; else break; + case 'b': if (input == "break") return TokenType.break_; else break; + case 'c': if (input == "catch") return TokenType.catch_; + else if (input == "class") return TokenType.class_; + else if (input == "const") return TokenType.const_; + else if (input == "creal") return TokenType.creal_; + else break; + case 'd': if (input == "dchar") return TokenType.dchar_; + else if (input == "debug") return TokenType.debug_; else break; + case 'f': if (input == "false") return TokenType.false_; + else if (input == "final") return TokenType.final_; + else if (input == "float") return TokenType.float_; + else break; + case 'i': if (input == "inout") return TokenType.inout_; + else if (input == "ireal") return TokenType.ireal_; else break; + case 'm': if (input == "macro") return TokenType.macro_; + else if (input == "mixin") return TokenType.mixin_; else break; + case 's': if (input == "scope") return TokenType.scope_; + else if (input == "short") return TokenType.short_; + else if (input == "super") return TokenType.super_; else break; + case 't': if (input == "throw") return TokenType.throw_; else break; + case 'u': if (input == "ubyte") return TokenType.ubyte_; + else if (input == "ucent") return TokenType.ucent_; + else if (input == "ulong") return TokenType.ulong_; + else if (input == "union") return TokenType.union_; + else break; + case 'w': if (input == "wchar") return TokenType.wchar_; + else if (input == "while") return TokenType.while_; + else break; default: break; } break; case 6: - switch (input) + switch (input[0]) { - case "assert": return TokenType.assert_; - case "cfloat": return TokenType.cfloat_; - case "delete": return TokenType.delete_; - case "double": return TokenType.double_; - case "export": return TokenType.export_; - case "extern": return TokenType.extern_; - case "ifloat": return TokenType.ifloat_; - case "import": return TokenType.import_; - case "module": return TokenType.module_; - case "pragma": return TokenType.pragma_; - case "public": return TokenType.public_; - case "return": return TokenType.return_; - case "shared": return TokenType.shared_; - case "static": return TokenType.static_; - case "struct": return TokenType.struct_; - case "switch": return TokenType.switch_; - case "typeid": return TokenType.typeid_; - case "typeof": return TokenType.typeof_; - case "ushort": return TokenType.ushort_; + case 'a': if (input == "assert") return TokenType.assert_; else break; + case 'c': if (input == "cfloat") return TokenType.cfloat_; else break; + case 'd': if (input == "delete") return TokenType.delete_; + else if (input == "double") return TokenType.double_; else break; + case 'e': if (input == "export") return TokenType.export_; + else if (input == "extern") return TokenType.extern_; else break; + case 'i': if (input == "ifloat") return TokenType.ifloat_; + else if (input == "import") return TokenType.import_; else break; + case 'm': if (input == "module") return TokenType.module_; else break; + case 'p': if (input == "pragma") return TokenType.pragma_; + else if (input == "public") return TokenType.public_; else break; + case 'r': if (input == "return") return TokenType.return_; else break; + case 's': if (input == "shared") return TokenType.shared_; + else if (input == "static") return TokenType.static_; + else if (input == "struct") return TokenType.struct_; + else if (input == "switch") return TokenType.switch_; else break; + case 't': if (input == "typeid") return TokenType.typeid_; + else if (input == "typeof") return TokenType.typeof_; else break; + case 'u': if (input == "ushort") return TokenType.ushort_; else break; default: break; } break; case 7: - switch (input) + switch (input[0]) { - case "__EOF__": return TokenType.eof; - case "cdouble": return TokenType.cdouble_; - case "default": return TokenType.default_; - case "finally": return TokenType.finally_; - case "foreach": return TokenType.foreach_; - case "idouble": return TokenType.idouble_; - case "nothrow": return TokenType.nothrow_; - case "package": return TokenType.package_; - case "private": return TokenType.private_; - case "typedef": return TokenType.typedef_; - case "version": return TokenType.version_; + case '_': if (input == "__EOF__") return TokenType.eof; else break; + case 'c': if (input == "cdouble") return TokenType.cdouble_; else break; + case 'd': if (input == "default") return TokenType.default_; else break; + case 'f': if (input == "finally") return TokenType.finally_; + else if (input == "foreach") return TokenType.foreach_; else break; + case 'i': if (input == "idouble") return TokenType.idouble_; else break; + case 'n': if (input == "nothrow") return TokenType.nothrow_; else break; + case 'p': if (input == "package") return TokenType.package_; + else if (input == "private") return TokenType.private_; else break; + case 't': if (input == "typedef") return TokenType.typedef_; else break; + case 'v': if (input == "version") return TokenType.version_; else break; default: break; } break; case 8: - switch (input) + switch (input[0]) { - case "override": return TokenType.override_; - case "continue": return TokenType.continue_; - case "__LINE__": return TokenType.line; - case "template": return TokenType.template_; - case "abstract": return TokenType.abstract_; - case "__traits": return TokenType.traits; - case "volatile": return TokenType.volatile_; - case "delegate": return TokenType.delegate_; - case "function": return TokenType.function_; - case "unittest": return TokenType.unittest_; - case "__FILE__": return TokenType.file; - case "__DATE__": return TokenType.date; - case "__TIME__": return TokenType.time; + case '_': if (input == "__DATE__") return TokenType.date; + else if (input == "__FILE__") return TokenType.file; + else if (input == "__LINE__") return TokenType.line; + else if (input == "__TIME__") return TokenType.time; + else if (input == "__traits") return TokenType.traits; else break; + case 'a': if (input == "abstract") return TokenType.abstract_; else break; + case 'c': if (input == "continue") return TokenType.continue_; else break; + case 'd': if (input == "delegate") return TokenType.delegate_; else break; + case 'f': if (input == "function") return TokenType.function_; else break; + case 'o': if (input == "override") return TokenType.override_; else break; + case 't': if (input == "template") return TokenType.template_; else break; + case 'u': if (input == "unittest") return TokenType.unittest_; else break; + case 'v': if (input == "volatile") return TokenType.volatile_; else break; default: break; } break; case 9: - switch (input) + switch (input[0]) { - case "__gshared": return TokenType.gshared; - case "immutable": return TokenType.immutable_; - case "interface": return TokenType.interface_; - case "invariant": return TokenType.invariant_; - case "protected": return TokenType.protected_; + case '_': if (input == "__gshared") return TokenType.gshared; else break; + case 'i': if (input == "immutable") return TokenType.immutable_; + else if (input == "interface") return TokenType.interface_; + else if (input == "invariant") return TokenType.invariant_; else break; + case 'p': if (input == "protected") return TokenType.protected_; else break; default: break; } break; case 10: - switch (input) + switch (input[0]) { - case "deprecated": return TokenType.deprecated_; - case "__VENDOR__": return TokenType.vendor; + case 'd': if (input == "deprecated") return TokenType.deprecated_; else break; + case '_': if (input == "__VENDOR__") return TokenType.vendor; else break; default: break; } break; @@ -2329,12 +2338,12 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) caseStatement ~= indentString; caseStatement ~= "\t{\n"; caseStatement ~= indentString; - caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n"; + caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; caseStatement ~= indentString; caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; caseStatement ~= ";\n"; caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= "\t\treturn;\n"; caseStatement ~= indentString; caseStatement ~= "\t}\n"; caseStatement ~= indentString; @@ -2349,13 +2358,13 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) caseStatement ~= v.value; caseStatement ~= ";\n"; caseStatement ~= indentString; - caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n"; + caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= "\t\treturn;\n"; caseStatement ~= indentString; caseStatement ~= "\t}\n"; - caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; +// caseStatement ~= indentString; +// caseStatement ~= "\treturn;\n"; } else { @@ -2366,7 +2375,7 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) caseStatement ~= indentString; caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n"; caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; + caseStatement ~= "\treturn;\n"; } } return caseStatement; @@ -2384,62 +2393,54 @@ string generateCaseTrie(string[] args ...) struct StringCache { + string get(const ubyte[] bytes) + { - void initialize() - { - pages.length = 1; - } - - string get(ubyte[] bytes) - { - - import std.stdio; - string* val = (cast(string) bytes) in index; - if (val !is null) - { - return *val; - } - else - { - auto s = insert(bytes); - index[s] = s; - return s; - } - } + import std.stdio; + size_t bucket; + hash_t h; + string* val = find(bytes, bucket, h); + if (val !is null) + { + return *val; + } + else + { + auto s = (cast(char[]) bytes).idup; + index[bucket] ~= s; + return s; + } + } private: - immutable pageSize = 1024 * 256; + string* find(const ubyte[] data, out size_t bucket, out hash_t h) + { + h = hash(data); + bucket = h % mapSize; + foreach (i; 0 .. index[bucket].length) + { + if (index[bucket][i] == data) + return &index[bucket][i]; + } + return null; + } - string insert(ubyte[] bytes) - { - if (bytes.length >= pageSize) - assert(false); - size_t last = pages.length - 1; - Page* p = &(pages[last]); - size_t free = p.data.length - p.lastUsed; - if (free >= bytes.length) - { - p.data[p.lastUsed .. (p.lastUsed + bytes.length)] = bytes; - p.lastUsed += bytes.length; - return cast(immutable(char)[]) p.data[p.lastUsed - bytes.length .. p.lastUsed]; - } - else - { - pages.length++; - pages[pages.length - 1].data[0 .. bytes.length] = bytes; - pages[pages.length - 1].lastUsed = bytes.length; - return cast(immutable(char)[]) pages[pages.length - 1].data[0 .. bytes.length]; - } - } + static hash_t hash(const(ubyte)[] data) + { + hash_t h = 5381; + int c; + size_t i; + while (i < data.length) + { + c = data[i++]; + h = ((h << 5) + h) ^ c; + } + return h; + } - struct Page - { - ubyte[pageSize] data = void; - size_t lastUsed; - } - Page[] pages; - string[string] index; + immutable mapSize = 997; + string[][mapSize] index; } //void main(string[] args) {}