diff --git a/autocomplete.d b/autocomplete.d index 7b5e33b..b408fdc 100644 --- a/autocomplete.d +++ b/autocomplete.d @@ -13,11 +13,11 @@ import std.stdio; import std.typecons; import std.path; import std.file; +import std.d.lexer; import parser; import langutils; import types; -import tokenizer; immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", "Cygwin", "D_Coverage", "D_Ddoc", "DigitalMars", "D_InlineAsm_X86", @@ -31,348 +31,348 @@ immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", immutable string[] scopes = ["exit", "failure", "success"]; -/** - * Returns: indicies into the token array - */ -size_t findEndOfExpression(const Token[] tokens, const size_t index) -out (result) -{ - assert (result < tokens.length); - assert (result >= index); -} -body -{ - size_t i = index; - loop: while (i < tokens.length) - { - switch (tokens[i].type) - { - case TokenType.Return: - case TokenType.New: - case TokenType.Delete: - case TokenType.Comma: - case TokenType.RBrace: - case TokenType.RParen: - case TokenType.RBracket: - case TokenType.Semicolon: - break loop; - case TokenType.LParen: - skipParens(tokens, i); - break; - case TokenType.LBrace: - skipBraces(tokens, i); - break; - case TokenType.LBracket: - skipBrackets(tokens, i); - break; - default: - ++i; - break; - } - } - return i; -} - -size_t findBeginningOfExpression(const Token[] tokens, const size_t index) -in -{ - assert (index < tokens.length); - assert (tokens.length > 0); -} -out (result) -{ - import std.string; - assert (result < tokens.length); - assert (result <= index, format("findBeginningOfExpression %d, %d", result, index)); -} -body -{ - size_t i = index; - loop: while (i < tokens.length) - { - switch (tokens[i].type) - { - case TokenType.Assign: case TokenType.BitAnd: case TokenType.BitAndEquals: - case TokenType.BitOr: case TokenType.BitOrEquals: case TokenType.CatEquals: - case TokenType.Colon: case TokenType.Comma: case TokenType.Decrement: - case TokenType.Div: case TokenType.DivEquals: case TokenType.Dollar: - case TokenType.Equals: case TokenType.GoesTo: - case TokenType.Greater: case TokenType.GreaterEqual: case TokenType.Hash: - case TokenType.Increment: case TokenType.LBrace: case TokenType.LBracket: - case TokenType.Less: case TokenType.LessEqual: case TokenType.LessEqualGreater: - case TokenType.LessOrGreater: case TokenType.LogicAnd: case TokenType.LogicOr: - case TokenType.LParen: case TokenType.Minus: case TokenType.MinusEquals: - case TokenType.Mod: case TokenType.ModEquals: case TokenType.MulEquals: - case TokenType.Not: case TokenType.NotEquals: case TokenType.NotGreater: - case TokenType.NotGreaterEqual: case TokenType.NotLess: case TokenType.NotLessEqual: - case TokenType.NotLessEqualGreater: case TokenType.Plus: case TokenType.PlusEquals: - case TokenType.Pow: case TokenType.PowEquals: case TokenType.RBrace: - case TokenType.Semicolon: case TokenType.ShiftLeft: case TokenType.ShiftLeftEqual: - case TokenType.ShiftRight: case TokenType.ShiftRightEqual: case TokenType.Slice: - case TokenType.Star: case TokenType.Ternary: case TokenType.Tilde: - case TokenType.Unordered: case TokenType.UnsignedShiftRight: case TokenType.UnsignedShiftRightEqual: - case TokenType.Vararg: case TokenType.Xor: case TokenType.XorEquals: - case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END: - return i + 1; - case TokenType.RParen: - if (i == 0) - break loop; - skipParens(tokens, i); - break; - case TokenType.RBracket: - if (i == 0) - break loop; - skipBrackets(tokens, i); - break; - default: - if (i == 0) - break loop; - i--; - break; - } - } - return i + 1; -} - -const(Token)[] splitCallChain(const(Token)[] tokens) -{ - auto app = appender!(Token[])(); - size_t i = 0; - while (i < tokens.length) - { - app.put(tokens[i++]); - while (i < tokens.length && tokens[i] == TokenType.LParen) skipParens(tokens, i); - while (i < tokens.length && tokens[i] == TokenType.LBracket) skipBrackets(tokens, i); - while (i < tokens.length && tokens[i] == TokenType.Dot) ++i; - } - return app.data; -} - -unittest -{ - auto code = `a.b[10].c("grcl").x`; - auto tokens = tokenize(code); - assert (splitCallChain(tokens) == ["a", "b", "c", "x"]); -} - -struct AutoComplete -{ - this(const (Token)[] tokens, CompletionContext context) - { - this.tokens = tokens; - this.context = context; - } - - string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor) - { - stderr.writeln("getting type of ", expression); - if (expression.length == 0) - return "void"; - auto type = typeOfVariable(expression[0], cursor); - if (type is null) - return "void"; - size_t index = 1; - while (index < expression.length) - { - const Tuple!(string, string)[string] typeMap = context.getMembersOfType( - type); - const Tuple!(string, string)* memberType = expression[index].value in typeMap; - if (memberType is null) - return "void"; - else - type = (*memberType)[0]; - index++; - } - return type; - } - - string typeOfVariable(Token symbol, size_t cursor) - { - // int is of type int, double of type double, and so on - if (symbol.value in typeProperties) - return symbol.value; - - string tokenType = getTypeFromToken(symbol); - if (tokenType !is null) - return tokenType; - - if (context.getMembersOfType(symbol.value)) - return symbol.value; - - // Arbitrarily define the depth of the cursor position as zero - // iterate backwards through the code to try to find the variable - int depth = 0; - auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); - auto index = preceedingTokens.length - 1; - while (true) - { - if (preceedingTokens[index] == TokenType.LBrace) - --depth; - else if (preceedingTokens[index] == TokenType.RBrace) - ++depth; - else if (depth <= 0 && preceedingTokens[index].value == symbol) - { - // Found the symbol, now determine if it was declared here. - auto p = preceedingTokens[index - 1]; - - - if ((p == TokenType.Auto || p == TokenType.Immutable - || p == TokenType.Const) - && preceedingTokens[index + 1] == TokenType.Assign) - { - // Try to determine the type of a variable declared as "auto" - return getTypeOfExpression( - tokens[index + 2 .. findEndOfExpression(tokens, index + 2)], - tokens, cursor); - } - else if (p == TokenType.Identifier - || (p.type > TokenType.TYPES_BEGIN - && p.type < TokenType.TYPES_END)) - { - // Handle simple cases like "int a;" or "Someclass instance;" - return p.value; - } - else if (p == TokenType.RBracket || p == TokenType.RParen) - { - return combineTokens(tokens[findBeginningOfExpression(tokens, index) .. index]); - } - } - if (index == 0) - break; - else - --index; - } - - // Find all struct or class bodies that we're in. - // Check for the symbol in those class/struct/interface bodies - // if match is found, return it - auto structs = context.getStructsContaining(cursor); - if (symbol == "this" && structs.length > 0) - { - return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; - } - - foreach (s; structs) - { - auto t = s.getMemberType(symbol.value); - if (t !is null) - return t; - } - return "void"; - } - - string symbolAt(size_t cursor) const - { - auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; - if (r.value.length + r.startIndex > cursor) - return r.value; - else - return null; - } - - string parenComplete(size_t cursor) - { - auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; - Token t = tokens[index]; - switch (tokens[index].type) - { - case TokenType.Version: - return "completions\n" ~ to!string(join(map!`a ~ " k"`(versions), "\n").array()); - case TokenType.Scope: - return "completions\n" ~ to!string(join(map!`a ~ " k"`(scopes), "\n").array()); - case TokenType.If: - case TokenType.Cast: - case TokenType.While: - case TokenType.For: - case TokenType.Foreach: - case TokenType.Switch: - return ""; - default: - size_t startIndex = findBeginningOfExpression(tokens, index); - auto callChain = splitCallChain(tokens[startIndex .. index + 1]); - auto expressionType = getTypeOfExpression( - callChain[0 .. $ - 1], tokens, cursor); - return "calltips\n" ~ to!string(context.getCallTipsFor(expressionType, - callChain[$ - 1].value, cursor).join("\n").array()); - } - } - - string dotComplete(size_t cursor) - { - stderr.writeln("dotComplete"); - auto index = assumeSorted(tokens).lowerBound(cursor).length - 1; - Token t = tokens[index]; - - // If the last character entered before the cursor isn't a dot, give up. - // The user was probably in the middle of typing the slice or vararg - // operators - if (t != TokenType.Dot) - return null; - - size_t startIndex = findBeginningOfExpression(tokens, index); - if (startIndex - 1 < tokens.length && tokens[startIndex - 1] == TokenType.Import) - { - return importComplete(splitCallChain(tokens[startIndex .. index])); - } - - auto expressionType = getTypeOfExpression( - splitCallChain(tokens[startIndex .. index]), tokens, cursor); - - stderr.writeln("expression type is ", expressionType); - - // Complete pointers and references the same way - if (expressionType[$ - 1] == '*') - expressionType = expressionType[0 .. $ - 1]; - - const Tuple!(string, string)[string] typeMap = context.getMembersOfType( - expressionType); - if (typeMap is null) - return ""; - auto app = appender!(string[])(); - foreach (k, t; typeMap) - app.put(k ~ " " ~ t[1]); - return to!string(array(join(sort!("a.toLower() < b.toLower()")(app.data), "\n"))); - } - - string importComplete(const(Token)[] tokens) - { - stderr.writeln("importComplete"); - auto app = appender!(string[])(); - string part = to!string(map!"a.value.dup"(tokens).join("/").array()); - foreach (path; context.importDirectories) - { - stderr.writeln("Searching for ", path, "/", part); - if (!exists(buildPath(path, part))) - continue; - stderr.writeln("found it"); - foreach (DirEntry dirEntry; dirEntries(buildPath(path, part), - SpanMode.shallow)) - { - if (dirEntry.isDir) - app.put(baseName(dirEntry.name) ~ " P"); - else if (dirEntry.name.endsWith(".d", ".di")) - app.put(stripExtension(baseName(dirEntry.name)) ~ " M"); - } - } - return to!string(sort!("a.toLower() < b.toLower()")(app.data).join("\n").array()); - } - - const(Token)[] tokens; - CompletionContext context; -} - -unittest -{ - auto code = q{ -struct TestStruct { int a; int b; } -TestStruct ts; -ts.a. - }; - - auto tokens = tokenize(code); - auto mod = parseModule(tokens); - auto context = new CompletionContext(mod); - auto completion = AutoComplete(tokens, context); - assert (completion.getTypeOfExpression(splitCallChain(tokens[13 .. 16]), - tokens, 56) == "int"); -} +///** +// * Returns: indicies into the token array +// */ +//size_t findEndOfExpression(const Token[] tokens, const size_t index) +//out (result) +//{ +// assert (result < tokens.length); +// assert (result >= index); +//} +//body +//{ +// size_t i = index; +// loop: while (i < tokens.length) +// { +// switch (tokens[i].type) +// { +// case TokenType.Return: +// case TokenType.New: +// case TokenType.Delete: +// case TokenType.Comma: +// case TokenType.RBrace: +// case TokenType.RParen: +// case TokenType.RBracket: +// case TokenType.Semicolon: +// break loop; +// case TokenType.LParen: +// skipParens(tokens, i); +// break; +// case TokenType.LBrace: +// skipBraces(tokens, i); +// break; +// case TokenType.LBracket: +// skipBrackets(tokens, i); +// break; +// default: +// ++i; +// break; +// } +// } +// return i; +//} +// +//size_t findBeginningOfExpression(const Token[] tokens, const size_t index) +//in +//{ +// assert (index < tokens.length); +// assert (tokens.length > 0); +//} +//out (result) +//{ +// import std.string; +// assert (result < tokens.length); +// assert (result <= index, format("findBeginningOfExpression %d, %d", result, index)); +//} +//body +//{ +// size_t i = index; +// loop: while (i < tokens.length) +// { +// switch (tokens[i].type) +// { +// case TokenType.Assign: case TokenType.BitAnd: case TokenType.BitAndEquals: +// case TokenType.BitOr: case TokenType.BitOrEquals: case TokenType.CatEquals: +// case TokenType.Colon: case TokenType.Comma: case TokenType.Decrement: +// case TokenType.Div: case TokenType.DivEquals: case TokenType.Dollar: +// case TokenType.Equals: case TokenType.GoesTo: +// case TokenType.Greater: case TokenType.GreaterEqual: case TokenType.Hash: +// case TokenType.Increment: case TokenType.LBrace: case TokenType.LBracket: +// case TokenType.Less: case TokenType.LessEqual: case TokenType.LessEqualGreater: +// case TokenType.LessOrGreater: case TokenType.LogicAnd: case TokenType.LogicOr: +// case TokenType.LParen: case TokenType.Minus: case TokenType.MinusEquals: +// case TokenType.Mod: case TokenType.ModEquals: case TokenType.MulEquals: +// case TokenType.Not: case TokenType.NotEquals: case TokenType.NotGreater: +// case TokenType.NotGreaterEqual: case TokenType.NotLess: case TokenType.NotLessEqual: +// case TokenType.NotLessEqualGreater: case TokenType.Plus: case TokenType.PlusEquals: +// case TokenType.Pow: case TokenType.PowEquals: case TokenType.RBrace: +// case TokenType.Semicolon: case TokenType.ShiftLeft: case TokenType.ShiftLeftEqual: +// case TokenType.ShiftRight: case TokenType.ShiftRightEqual: case TokenType.Slice: +// case TokenType.Star: case TokenType.Ternary: case TokenType.Tilde: +// case TokenType.Unordered: case TokenType.UnsignedShiftRight: case TokenType.UnsignedShiftRightEqual: +// case TokenType.Vararg: case TokenType.Xor: case TokenType.XorEquals: +// case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END: +// return i + 1; +// case TokenType.RParen: +// if (i == 0) +// break loop; +// skipParens(tokens, i); +// break; +// case TokenType.RBracket: +// if (i == 0) +// break loop; +// skipBrackets(tokens, i); +// break; +// default: +// if (i == 0) +// break loop; +// i--; +// break; +// } +// } +// return i + 1; +//} +// +//const(Token)[] splitCallChain(const(Token)[] tokens) +//{ +// auto app = appender!(Token[])(); +// size_t i = 0; +// while (i < tokens.length) +// { +// app.put(tokens[i++]); +// while (i < tokens.length && tokens[i] == TokenType.LParen) skipParens(tokens, i); +// while (i < tokens.length && tokens[i] == TokenType.LBracket) skipBrackets(tokens, i); +// while (i < tokens.length && tokens[i] == TokenType.Dot) ++i; +// } +// return app.data; +//} +// +//unittest +//{ +// auto code = `a.b[10].c("grcl").x`; +// auto tokens = tokenize(code); +// assert (splitCallChain(tokens) == ["a", "b", "c", "x"]); +//} +// +//struct AutoComplete +//{ +// this(const (Token)[] tokens, CompletionContext context) +// { +// this.tokens = tokens; +// this.context = context; +// } +// +// string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor) +// { +// stderr.writeln("getting type of ", expression); +// if (expression.length == 0) +// return "void"; +// auto type = typeOfVariable(expression[0], cursor); +// if (type is null) +// return "void"; +// size_t index = 1; +// while (index < expression.length) +// { +// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( +// type); +// const Tuple!(string, string)* memberType = expression[index].value in typeMap; +// if (memberType is null) +// return "void"; +// else +// type = (*memberType)[0]; +// index++; +// } +// return type; +// } +// +// string typeOfVariable(Token symbol, size_t cursor) +// { +// // int is of type int, double of type double, and so on +// if (symbol.value in typeProperties) +// return symbol.value; +// +// string tokenType = getTypeFromToken(symbol); +// if (tokenType !is null) +// return tokenType; +// +// if (context.getMembersOfType(symbol.value)) +// return symbol.value; +// +// // Arbitrarily define the depth of the cursor position as zero +// // iterate backwards through the code to try to find the variable +// int depth = 0; +// auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); +// auto index = preceedingTokens.length - 1; +// while (true) +// { +// if (preceedingTokens[index] == TokenType.LBrace) +// --depth; +// else if (preceedingTokens[index] == TokenType.RBrace) +// ++depth; +// else if (depth <= 0 && preceedingTokens[index].value == symbol) +// { +// // Found the symbol, now determine if it was declared here. +// auto p = preceedingTokens[index - 1]; +// +// +// if ((p == TokenType.Auto || p == TokenType.Immutable +// || p == TokenType.Const) +// && preceedingTokens[index + 1] == TokenType.Assign) +// { +// // Try to determine the type of a variable declared as "auto" +// return getTypeOfExpression( +// tokens[index + 2 .. findEndOfExpression(tokens, index + 2)], +// tokens, cursor); +// } +// else if (p == TokenType.Identifier +// || (p.type > TokenType.TYPES_BEGIN +// && p.type < TokenType.TYPES_END)) +// { +// // Handle simple cases like "int a;" or "Someclass instance;" +// return p.value; +// } +// else if (p == TokenType.RBracket || p == TokenType.RParen) +// { +// return combineTokens(tokens[findBeginningOfExpression(tokens, index) .. index]); +// } +// } +// if (index == 0) +// break; +// else +// --index; +// } +// +// // Find all struct or class bodies that we're in. +// // Check for the symbol in those class/struct/interface bodies +// // if match is found, return it +// auto structs = context.getStructsContaining(cursor); +// if (symbol == "this" && structs.length > 0) +// { +// return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; +// } +// +// foreach (s; structs) +// { +// auto t = s.getMemberType(symbol.value); +// if (t !is null) +// return t; +// } +// return "void"; +// } +// +// string symbolAt(size_t cursor) const +// { +// auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; +// if (r.value.length + r.startIndex > cursor) +// return r.value; +// else +// return null; +// } +// +// string parenComplete(size_t cursor) +// { +// auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; +// Token t = tokens[index]; +// switch (tokens[index].type) +// { +// case TokenType.Version: +// return "completions\n" ~ to!string(join(map!`a ~ " k"`(versions), "\n").array()); +// case TokenType.Scope: +// return "completions\n" ~ to!string(join(map!`a ~ " k"`(scopes), "\n").array()); +// case TokenType.If: +// case TokenType.Cast: +// case TokenType.While: +// case TokenType.For: +// case TokenType.Foreach: +// case TokenType.Switch: +// return ""; +// default: +// size_t startIndex = findBeginningOfExpression(tokens, index); +// auto callChain = splitCallChain(tokens[startIndex .. index + 1]); +// auto expressionType = getTypeOfExpression( +// callChain[0 .. $ - 1], tokens, cursor); +// return "calltips\n" ~ to!string(context.getCallTipsFor(expressionType, +// callChain[$ - 1].value, cursor).join("\n").array()); +// } +// } +// +// string dotComplete(size_t cursor) +// { +// stderr.writeln("dotComplete"); +// auto index = assumeSorted(tokens).lowerBound(cursor).length - 1; +// Token t = tokens[index]; +// +// // If the last character entered before the cursor isn't a dot, give up. +// // The user was probably in the middle of typing the slice or vararg +// // operators +// if (t != TokenType.Dot) +// return null; +// +// size_t startIndex = findBeginningOfExpression(tokens, index); +// if (startIndex - 1 < tokens.length && tokens[startIndex - 1] == TokenType.Import) +// { +// return importComplete(splitCallChain(tokens[startIndex .. index])); +// } +// +// auto expressionType = getTypeOfExpression( +// splitCallChain(tokens[startIndex .. index]), tokens, cursor); +// +// stderr.writeln("expression type is ", expressionType); +// +// // Complete pointers and references the same way +// if (expressionType[$ - 1] == '*') +// expressionType = expressionType[0 .. $ - 1]; +// +// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( +// expressionType); +// if (typeMap is null) +// return ""; +// auto app = appender!(string[])(); +// foreach (k, t; typeMap) +// app.put(k ~ " " ~ t[1]); +// return to!string(array(join(sort!("a.toLower() < b.toLower()")(app.data), "\n"))); +// } +// +// string importComplete(const(Token)[] tokens) +// { +// stderr.writeln("importComplete"); +// auto app = appender!(string[])(); +// string part = to!string(map!"a.value.dup"(tokens).join("/").array()); +// foreach (path; context.importDirectories) +// { +// stderr.writeln("Searching for ", path, "/", part); +// if (!exists(buildPath(path, part))) +// continue; +// stderr.writeln("found it"); +// foreach (DirEntry dirEntry; dirEntries(buildPath(path, part), +// SpanMode.shallow)) +// { +// if (dirEntry.isDir) +// app.put(baseName(dirEntry.name) ~ " P"); +// else if (dirEntry.name.endsWith(".d", ".di")) +// app.put(stripExtension(baseName(dirEntry.name)) ~ " M"); +// } +// } +// return to!string(sort!("a.toLower() < b.toLower()")(app.data).join("\n").array()); +// } +// +// const(Token)[] tokens; +// CompletionContext context; +//} +// +//unittest +//{ +// auto code = q{ +//struct TestStruct { int a; int b; } +//TestStruct ts; +//ts.a. +// }; +// +// auto tokens = tokenize(code); +// auto mod = parseModule(tokens); +// auto context = new CompletionContext(mod); +// auto completion = AutoComplete(tokens, context); +// assert (completion.getTypeOfExpression(splitCallChain(tokens[13 .. 16]), +// tokens, 56) == "int"); +//} diff --git a/build.sh b/build.sh index 2250998..f0f544f 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,2 @@ -dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline -#dmd *.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest +dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline +#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest diff --git a/cache.d b/cache.d index c2ea3fa..6398422 100644 --- a/cache.d +++ b/cache.d @@ -13,11 +13,11 @@ import std.uuid; import std.array; import std.string; import std.conv; +import std.d.lexer; import location; import parser; import types; -import tokenizer; private sqlite3* database; @@ -104,10 +104,10 @@ void updateCache(string dirs[], string moduleNames[]) if (timeLastModified.stdTime == mtime) continue; - // re-parse the module - Module m = parseModule(byToken(readText(filePath)).array()); - - updateCache(m); +// // re-parse the module +// Module m = parseModule(byToken(readText(filePath)).array()); +// +// updateCache(m); sqlite3_reset(statement); } diff --git a/circularbuffer.d b/circularbuffer.d index 714720a..9c45128 100644 --- a/circularbuffer.d +++ b/circularbuffer.d @@ -9,11 +9,12 @@ import std.math; import std.array; import std.range; -struct CircularBuffer(T, R) if (isInputRange!(R) && is (ElementType!(R) == T)) +class CircularBuffer(T) : InputRange!(T) + { public: - this (size_t size, R range) + this (size_t size, InputRange!(T) range) { this.range = range; this.margin = size; @@ -31,41 +32,33 @@ public: } } - T opIndex(size_t index) const - in - { - assert (index <= sourceIndex + margin); - assert (index >= sourceIndex - margin); - } - body - { - return data[index % data.length]; - } - - T front() const @property + override T front() const @property { return data[index]; } - T peek(int offset) + T peek(int offset = 1) in { - assert(abs(offset) <= margin); - assert(sourceIndex + offset >= 0); + assert(canPeek(offset)); } body { return data[(index + offset) % data.length]; } - T popFront() + bool canPeek(int offset = 1) + { + return abs(offset) <= margin && sourceIndex + offset >= 0; + } + + override void popFront() in { assert (!_empty); } body { - T v = data[index]; index = (index + 1) % data.length; ++sourceIndex; if (range.empty()) @@ -79,7 +72,6 @@ public: end = (end + 1) % data.length; range.popFront(); } - return v; } bool empty() const @property @@ -87,8 +79,40 @@ public: return _empty; } + override T moveFront() + { + auto r = front(); + popFront(); + return r; + } + + override int opApply(int delegate(T) dg) + { + int result = 0; + while (!empty) + { + result = dg(front); + if (result) + break; + } + return result; + } + + override int opApply(int delegate(size_t, T) dg) + { + int result = 0; + int i = 0; + while (!empty) + { + result = dg(i, front); + if (result) + break; + } + return result; + } + private: - R range; + InputRange!(T) range; immutable size_t margin; T[] data; size_t sourceIndex; @@ -123,8 +147,6 @@ unittest buf.popFront(); buf.popFront(); assert (buf.front == 4); - assert (buf[2] == 2); - assert (buf[6] == 6); } unittest diff --git a/codegen.d b/codegen.d deleted file mode 100644 index 822c5d3..0000000 --- a/codegen.d +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// This module triggers DMD bug 7900 if compiled with -inline - -module codegen; - -import std.range; - - -class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) -{ - /** - * Adds the given value to the trie with the given key - */ - void add(K key, V value) pure - { - TrieNode!(K,V) current = this; - foreach(keyPart; key) - { - if ((keyPart in current.children) is null) - { - auto node = new TrieNode!(K, V); - current.children[keyPart] = node; - current = node; - } - else - current = current.children[keyPart]; - } - current.value = value; - } -} - -class TrieNode(K, V) if (isInputRange!K) -{ - V value; - TrieNode!(K,V)[ElementType!K] children; -} - -string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) -{ - string caseStatement = ""; - foreach(dchar k, TrieNode!(K,V) v; node.children) - { - caseStatement ~= indentString; - caseStatement ~= "case '"; - caseStatement ~= k; - caseStatement ~= "':\n"; - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.value ~= '"; - caseStatement ~= k; - caseStatement ~= "';\n"; - caseStatement ~= indentString; - caseStatement ~= "\t++index;\n"; - caseStatement ~= indentString; - caseStatement ~= "\trange.popFront();\n"; - if (v.children.length > 0) - { - caseStatement ~= indentString; - caseStatement ~= "\tif (range.isEoF())\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - caseStatement ~= indentString; - caseStatement ~= "\tswitch (range.front)\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= printCaseStatements(v, indentString ~ "\t"); - caseStatement ~= indentString; - caseStatement ~= "\tdefault:\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; - } - else - { - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; - } - } - return caseStatement; -} - -string generateCaseTrie(string[] args ...) -{ - auto t = new Trie!(string, string); - for(int i = 0; i < args.length; i+=2) - { - t.add(args[i], args[i+1]); - } - return printCaseStatements(t, ""); -} diff --git a/highlighter.d b/highlighter.d index 032c3f4..03286f7 100644 --- a/highlighter.d +++ b/highlighter.d @@ -7,8 +7,10 @@ module highlighter; import std.stdio; -import langutils; import std.array; +import std.d.lexer; + +import langutils; void writeSpan(string cssClass, string value) { @@ -23,13 +25,13 @@ void highlight(R)(R tokens)
]"); diff --git a/langutils.d b/langutils.d index 1649f4a..cf0c995 100644 --- a/langutils.d +++ b/langutils.d @@ -4,7 +4,9 @@ // http://www.boost.org/LICENSE_1_0.txt) module langutils; + import std.array; +import std.d.lexer; /** @@ -33,18 +35,6 @@ pure nothrow bool isAttribute(TokenType input) return input > TokenType.ATTRIBUTES_BEGIN && input < TokenType.ATTRIBUTES_END; } -/** - * Returns: the token type for the given string. Defaults to "identifier" - */ -pure nothrow TokenType lookupTokenType(const string input) -{ - immutable(TokenType)* type = input in tokenLookup; - if (type !is null) - return *type; - else - return TokenType.Identifier; -} - string combineTokens(ref const Token[] tokens) { auto app = appender!string(); @@ -53,533 +43,7 @@ string combineTokens(ref const Token[] tokens) return app.data; } -pure nothrow TokenType lookupTokenTypeOptimized(const string input) -{ - switch(input.length) - { - case 2: - switch (input) - { - case "do": return TokenType.Do; - case "if": return TokenType.If; - case "in": return TokenType.In; - case "is": return TokenType.Is; - default: break; - } - break; - case 3: - switch (input) - { - case "asm": return TokenType.Asm; - case "for": return TokenType.For; - case "int": return TokenType.Int; - case "new": return TokenType.New; - case "out": return TokenType.Out; - case "ref": return TokenType.Ref; - case "try": return TokenType.Try; - default: break; - } - break; - case 4: - switch (input) - { - case "auto": return TokenType.Auto; - case "body": return TokenType.Body; - case "bool": return TokenType.Bool; - case "byte": return TokenType.Byte; - case "case": return TokenType.Case; - case "cast": return TokenType.Cast; - case "cent": return TokenType.Cent; - case "char": return TokenType.Char; - case "else": return TokenType.Else; - case "enum": return TokenType.Enum; - case "goto": return TokenType.Goto; - case "lazy": return TokenType.Lazy; - case "long": return TokenType.Long; - case "null": return TokenType.Null; - case "pure": return TokenType.Pure; - case "real": return TokenType.Real; - case "this": return TokenType.This; - case "true": return TokenType.True; - case "uint": return TokenType.Uint; - case "void": return TokenType.Void; - case "with": return TokenType.With; - default: break; - } - break; - case 5: - switch (input) - { - case "alias": return TokenType.Alias; - case "align": return TokenType.Align; - case "break": return TokenType.Break; - case "catch": return TokenType.Catch; - case "class": return TokenType.Class; - case "const": return TokenType.Const; - case "creal": return TokenType.Creal; - case "dchar": return TokenType.Dchar; - case "debug": return TokenType.Debug; - case "false": return TokenType.False; - case "final": return TokenType.Final; - case "float": return TokenType.Float; - case "inout": return TokenType.Inout; - case "ireal": return TokenType.Ireal; - case "macro": return TokenType.Macro; - case "mixin": return TokenType.Mixin; - case "scope": return TokenType.Scope; - case "short": return TokenType.Short; - case "super": return TokenType.Super; - case "throw": return TokenType.Throw; - case "ubyte": return TokenType.Ubyte; - case "ucent": return TokenType.Ucent; - case "ulong": return TokenType.Ulong; - case "union": return TokenType.Union; - case "wchar": return TokenType.Wchar; - case "while": return TokenType.While; - default: break; - } - break; - case 6: - switch (input) - { - case "assert": return TokenType.Assert; - case "cfloat": return TokenType.Cfloat; - case "delete": return TokenType.Delete; - case "double": return TokenType.Double; - case "export": return TokenType.Export; - case "extern": return TokenType.Extern; - case "ifloat": return TokenType.Ifloat; - case "import": return TokenType.Import; - case "module": return TokenType.Module; - case "pragma": return TokenType.Pragma; - case "public": return TokenType.Public; - case "return": return TokenType.Return; - case "shared": return TokenType.Shared; - case "static": return TokenType.Static; - case "string": return TokenType.String; - case "struct": return TokenType.Struct; - case "switch": return TokenType.Switch; - case "typeid": return TokenType.Typeid; - case "typeof": return TokenType.Typeof; - case "ushort": return TokenType.Ushort; - default: break; - } - break; - case 7: - switch (input) - { - case "cdouble": return TokenType.Cdouble; - case "default": return TokenType.Default; - case "dstring": return TokenType.DString; - case "finally": return TokenType.Finally; - case "foreach": return TokenType.Foreach; - case "idouble": return TokenType.Idouble; - case "nothrow": return TokenType.Nothrow; - case "package": return TokenType.Package; - case "private": return TokenType.Private; - case "typedef": return TokenType.Typedef; - case "version": return TokenType.Version; - case "wstring": return TokenType.WString; - default: break; - } - break; - case 8: - switch (input) - { - case "override": return TokenType.Override; - case "continue": return TokenType.Continue; - case "__LINE__": return TokenType.Line; - case "template": return TokenType.Template; - case "abstract": return TokenType.Abstract; - case "__thread": return TokenType.Thread; - case "__traits": return TokenType.Traits; - case "volatile": return TokenType.Volatile; - case "delegate": return TokenType.Delegate; - case "function": return TokenType.Function; - case "unittest": return TokenType.Unittest; - case "__FILE__": return TokenType.File; - default: break; - } - break; - case 9: - switch (input) - { - case "__gshared": return TokenType.Gshared; - case "immutable": return TokenType.Immutable; - case "interface": return TokenType.Interface; - case "invariant": return TokenType.Invariant; - case "protected": return TokenType.Protected; - default: break; - } - break; - case 10: - if (input == "deprecated") - return TokenType.Deprecated; - break; - case 11: - if (input == "synchronized") - return TokenType.Synchronized; - break; - case 13: - if (input == "foreach_reverse") - return TokenType.Foreach_reverse; - break; - default: break; - } - return TokenType.Identifier; -} - - -/** - * Listing of all the tokens in the D language - */ -enum TokenType: uint -{ -// Operators - OPERATORS_BEGIN, - Assign, /// = - At, /// @ - BitAnd, /// & - BitAndEquals, /// &= - BitOr, /// | - BitOrEquals, /// |= - CatEquals, /// ~= - Colon, /// : - Comma, /// , - Decrement, /// -- - Div, /// / - DivEquals, /// /= - Dollar, /// $ - Dot, /// . - Equals, /// == - GoesTo, // => - Greater, /// > - GreaterEqual, /// >= - Hash, // # - Increment, /// ++ - LBrace, /// { - LBracket, /// [ - Less, /// < - LessEqual, /// <= - LessEqualGreater, // <>= - LessOrGreater, /// <> - LogicAnd, /// && - LogicOr, /// || - LParen, /// $(LPAREN) - Minus, /// - - MinusEquals, /// -= - Mod, /// % - ModEquals, /// %= - MulEquals, /// *= - Not, /// ! - NotEquals, /// != - NotGreater, /// !> - NotGreaterEqual, /// !>= - NotLess, /// !< - NotLessEqual, /// !<= - NotLessEqualGreater, /// !<> - Plus, /// + - PlusEquals, /// += - Pow, /// ^^ - PowEquals, /// ^^= - RBrace, /// } - RBracket, /// ] - RParen, /// $(RPAREN) - Semicolon, /// ; - ShiftLeft, /// << - ShiftLeftEqual, /// <<= - ShiftRight, /// >> - ShiftRightEqual, /// >>= - Slice, // .. - Star, /// * - Ternary, /// ? - Tilde, /// ~ - Unordered, /// !<>= - UnsignedShiftRight, /// >>> - UnsignedShiftRightEqual, /// >>>= - Vararg, /// ... - Xor, /// ^ - XorEquals, /// ^= - OPERATORS_END, - - // Types - TYPES_BEGIN, - Bool, /// bool, - Byte, /// byte, - Cdouble, /// cdouble, - Cent, /// cent, - Cfloat, /// cfloat, - Char, /// char, - Creal, /// creal, - Dchar, /// dchar, - Double, /// double, - DString, /// dstring - Float, /// float, - Function, /// function, - Idouble, /// idouble, - Ifloat, /// ifloat, - Int, /// int, - Ireal, /// ireal, - Long, /// long, - Real, /// real, - Short, /// short, - String, /// string - Ubyte, /// ubyte, - Ucent, /// ucent, - Uint, /// uint, - Ulong, /// ulong, - Ushort, /// ushort, - Void, /// void, - Wchar, /// wchar, - WString, /// wstring - TYPES_END, - Template, /// template, - - // Keywords - KEYWORDS_BEGIN, - ATTRIBUTES_BEGIN, - Align, /// align, - Deprecated, /// deprecated, - Extern, /// extern, - Pragma, /// pragma, - PROTECTION_BEGIN, - Export, /// export, - Package, /// package, - Private, /// private, - Protected, /// protected, - Public, /// public, - PROTECTION_END, - Abstract, /// abstract, - AtDisable, /// @disable - Auto, /// auto, - Const, /// const, - Final, /// final - Gshared, /// __gshared, - Immutable, // immutable, - Inout, // inout, - Scope, /// scope, - Shared, // shared, - Static, /// static, - Synchronized, /// synchronized, - ATTRIBUTES_END, - Alias, /// alias, - Asm, /// asm, - Assert, /// assert, - Body, /// body, - Break, /// break, - Case, /// case, - Cast, /// cast, - Catch, /// catch, - Class, /// class, - Continue, /// continue, - Debug, /// debug, - Default, /// default, - Delegate, /// delegate, - Delete, /// delete, - Do, /// do, - Else, /// else, - Enum, /// enum, - False, /// false, - Finally, /// finally, - Foreach, /// foreach, - Foreach_reverse, /// foreach_reverse, - For, /// for, - Goto, /// goto, - If, /// if , - Import, /// import, - In, /// in, - Interface, /// interface, - Invariant, /// invariant, - Is, /// is, - Lazy, /// lazy, - Macro, /// macro, - Mixin, /// mixin, - Module, /// module, - New, /// new, - Nothrow, /// nothrow, - Null, /// null, - Out, /// out, - Override, /// override, - Pure, /// pure, - Ref, /// ref, - Return, /// return, - Struct, /// struct, - Super, /// super, - Switch, /// switch , - This, /// this, - Throw, /// throw, - True, /// true, - Try, /// try, - Typedef, /// typedef, - Typeid, /// typeid, - Typeof, /// typeof, - Union, /// union, - Unittest, /// unittest, - Version, /// version, - Volatile, /// volatile, - While, /// while , - With, /// with, - KEYWORDS_END, - -// Constants - CONSTANTS_BEGIN, - File, /// __FILE__, - Line, /// __LINE__, - Thread, /// __thread, - Traits, /// __traits, - CONSTANTS_END, - -// Misc - MISC_BEGIN, - Blank, /// unknown token type - Comment, /// /** comment */ or // comment or ///comment - Identifier, /// anything else - ScriptLine, // Line at the beginning of source file that starts from #! - Whitespace, /// whitespace - NUMBERS_BEGIN, - DoubleLiteral, /// 123.456 - FloatLiteral, /// 123.456f or 0x123_45p-af - IDoubleLiteral, /// 123.456i - IFloatLiteral, /// 123.456fi - IntLiteral, /// 123 or 0b1101010101 - LongLiteral, /// 123L - RealLiteral, /// 123.456L - IRealLiteral, /// 123.456Li - UnsignedIntLiteral, /// 123u - UnsignedLongLiteral, /// 123uL - NUMBERS_END, - STRINGS_BEGIN, - DStringLiteral, /// "32-bit character string"d - StringLiteral, /// "a string" - WStringLiteral, /// "16-bit character string"w - STRINGS_END, - MISC_END, -} - - -/** - * lookup table for converting strings to tokens - */ -immutable TokenType[string] tokenLookup; - - -static this() -{ - tokenLookup = [ - "abstract" : TokenType.Abstract, - "alias" : TokenType.Alias, - "align" : TokenType.Align, - "asm" : TokenType.Asm, - "assert" : TokenType.Assert, - "auto" : TokenType.Auto, - "body" : TokenType.Body, - "bool" : TokenType.Bool, - "break" : TokenType.Break, - "byte" : TokenType.Byte, - "case" : TokenType.Case, - "cast" : TokenType.Cast, - "catch" : TokenType.Catch, - "cdouble" : TokenType.Cdouble, - "cent" : TokenType.Cent, - "cfloat" : TokenType.Cfloat, - "char" : TokenType.Char, - "class" : TokenType.Class, - "const" : TokenType.Const, - "continue" : TokenType.Continue, - "creal" : TokenType.Creal, - "dchar" : TokenType.Dchar, - "debug" : TokenType.Debug, - "default" : TokenType.Default, - "delegate" : TokenType.Delegate, - "delete" : TokenType.Delete, - "deprecated" : TokenType.Deprecated, - "do" : TokenType.Do, - "double" : TokenType.Double, - "dstring" : TokenType.DString, - "else" : TokenType.Else, - "enum" : TokenType.Enum, - "export" : TokenType.Export, - "extern" : TokenType.Extern, - "false" : TokenType.False, - "__FILE__" : TokenType.File, - "finally" : TokenType.Finally, - "final" : TokenType.Final, - "float" : TokenType.Float, - "foreach_reverse" : TokenType.Foreach_reverse, - "foreach" : TokenType.Foreach, - "for" : TokenType.For, - "function" : TokenType.Function, - "goto" : TokenType.Goto, - "__gshared" : TokenType.Gshared, - "idouble" : TokenType.Idouble, - "ifloat" : TokenType.Ifloat, - "if" : TokenType.If, - "immutable" : TokenType.Immutable, - "import" : TokenType.Import, - "inout" : TokenType.Inout, - "interface" : TokenType.Interface, - "in" : TokenType.In, - "int" : TokenType.Int, - "invariant" : TokenType.Invariant, - "ireal" : TokenType.Ireal, - "is" : TokenType.Is, - "lazy" : TokenType.Lazy, - "__LINE__" : TokenType.Line, - "long" : TokenType.Long, - "macro" : TokenType.Macro, - "mixin" : TokenType.Mixin, - "module" : TokenType.Module, - "new" : TokenType.New, - "nothrow" : TokenType.Nothrow, - "null" : TokenType.Null, - "out" : TokenType.Out, - "override" : TokenType.Override, - "package" : TokenType.Package, - "pragma" : TokenType.Pragma, - "private" : TokenType.Private, - "protected" : TokenType.Protected, - "public" : TokenType.Public, - "pure" : TokenType.Pure, - "real" : TokenType.Real, - "ref" : TokenType.Ref, - "return" : TokenType.Return, - "scope" : TokenType.Scope, - "shared" : TokenType.Shared, - "short" : TokenType.Short, - "static" : TokenType.Static, - "string" : TokenType.String, - "struct" : TokenType.Struct, - "super" : TokenType.Super, - "switch" : TokenType.Switch, - "synchronized" : TokenType.Synchronized, - "template" : TokenType.Template, - "this" : TokenType.This, - "__thread" : TokenType.Thread, - "throw" : TokenType.Throw, - "__traits" : TokenType.Traits, - "true" : TokenType.True, - "try" : TokenType.Try, - "typedef" : TokenType.Typedef, - "typeid" : TokenType.Typeid, - "typeof" : TokenType.Typeof, - "ubyte" : TokenType.Ubyte, - "ucent" : TokenType.Ucent, - "uint" : TokenType.Uint, - "ulong" : TokenType.Ulong, - "union" : TokenType.Union, - "unittest" : TokenType.Unittest, - "ushort" : TokenType.Ushort, - "version" : TokenType.Version, - "void" : TokenType.Void, - "volatile" : TokenType.Volatile, - "wchar" : TokenType.Wchar, - "while" : TokenType.While, - "with" : TokenType.With, - "wstring" : TokenType.WString, - ]; -} - -pure string getTypeFromToken(ref const Token t) +pure string getTypeFromToken(const Token t) { switch (t.type) { @@ -609,56 +73,8 @@ pure string getTypeFromToken(ref const Token t) } } -pure bool isIdentifierOrType(ref const Token t) +pure bool isIdentifierOrType(inout Token t) { return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN && TokenType.TYPES_END); } - -/** - * Token structure - */ -struct Token -{ - /// The token type - TokenType type; - - /// The representation of the token in the original source code - string value; - - /// The number of the line the token is on - uint lineNumber; - - /// The character index of the start of the token in the original text - uint startIndex; - - /** - * Check to see if the token is of the same type and has the same string - * representation as the given token - */ - bool opEquals(ref const(Token) other) const - { - return other.type == type && other.value == value; - } - - /** - * Checks to see if the token's string representation is equal to the given - * string - */ - bool opEquals(string range) const { return range == value; } - - /** - * Checks to see if the token is of the given type - */ - bool opEquals(TokenType t) const { return type == t; } - - /** - * Comparison operator orders by start index - */ - int opCmp(size_t i) const - { - if (startIndex < i) return -1; - if (startIndex > i) return 1; - return 0; - } -} diff --git a/main.d b/main.d index 39e3488..848cd0a 100644 --- a/main.d +++ b/main.d @@ -15,13 +15,18 @@ import std.parallelism; import std.path; import std.regex; import std.stdio; +import std.d.lexer; + import autocomplete; import highlighter; import langutils; import location; import parser; -import tokenizer; + import types; +import circularbuffer; + +immutable size_t CIRC_BUFF_SIZE = 4; pure bool isLineOfCode(TokenType t) { @@ -100,9 +105,9 @@ int main(string[] args) { string[] importDirs; bool sloc; - bool dotComplete; + /+bool dotComplete;+/ bool json; - bool parenComplete; + /+bool parenComplete;+/ bool highlight; bool ctags; bool recursiveCtags; @@ -111,8 +116,8 @@ int main(string[] args) try { - getopt(args, "I", &importDirs, "dotComplete", &dotComplete, "sloc", &sloc, - "json", &json, "parenComplete", &parenComplete, "highlight", &highlight, + getopt(args, "I", &importDirs,/+ "dotComplete", &dotComplete,+/ "sloc", &sloc, + "json", &json, /+"parenComplete", &parenComplete,+/ "highlight", &highlight, "ctags", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help); } catch (Exception e) @@ -120,7 +125,7 @@ int main(string[] args) stderr.writeln(e.msg); } - if (help || (!sloc && !dotComplete && !json && !parenComplete && !highlight + if (help || (!sloc && /+!dotComplete &&+/ !json /+&& !parenComplete+/ && !highlight && !ctags && !format)) { printHelp(); @@ -166,7 +171,7 @@ int main(string[] args) return 0; } - if (dotComplete || parenComplete) + /+if (dotComplete || parenComplete) { if (isAbsolute(args[1])) importDirs ~= dirName(args[1]); @@ -203,11 +208,11 @@ int main(string[] args) else if (dotComplete) writeln(complete.dotComplete(to!size_t(args[1]))); return 0; - } + }+/ if (json) { - Token[] tokens; + CircularBuffer!(Token) tokens; if (args.length == 1) { // Read from stdin @@ -215,46 +220,46 @@ int main(string[] args) char[] buf; while (stdin.readln(buf)) f.put(buf); - tokens = byToken(f.data).array(); + tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data)); } else { // read given file - tokens = byToken(readText(args[1])).array(); + tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1]))); } auto mod = parseModule(tokens); mod.writeJSONTo(stdout); return 0; } - if (ctags) - { - if (!recursiveCtags) - { - auto tokens = byToken(readText(args[1])); - auto mod = parseModule(tokens.array()); - mod.writeCtagsTo(stdout, args[1]); - } - else - { - Module m; - foreach (dirEntry; dirEntries(args[1], SpanMode.breadth)) - { - if (!dirEntry.name.endsWith(".d", ".di")) - continue; - stderr.writeln("Generating tags for ", dirEntry.name); - auto tokens = byToken(readText(dirEntry.name)); - if (m is null) - m = parseModule(tokens.array()); - else - { - auto mod = parseModule(tokens.array()); - m.merge(mod); - } - } - m.writeCtagsTo(stdout, ""); - } - } +// if (ctags) +// { +// if (!recursiveCtags) +// { +// auto tokens = byToken(readText(args[1])); +// auto mod = parseModule(tokens.array()); +// mod.writeCtagsTo(stdout, args[1]); +// } +// else +// { +// Module m; +// foreach (dirEntry; dirEntries(args[1], SpanMode.breadth)) +// { +// if (!dirEntry.name.endsWith(".d", ".di")) +// continue; +// stderr.writeln("Generating tags for ", dirEntry.name); +// auto tokens = byToken(readText(dirEntry.name)); +// if (m is null) +// m = parseModule(tokens.array()); +// else +// { +// auto mod = parseModule(tokens.array()); +// m.merge(mod); +// } +// } +// m.writeCtagsTo(stdout, ""); +// } +// } return 0; } diff --git a/parser.d b/parser.d index 1db9481..70498a8 100644 --- a/parser.d +++ b/parser.d @@ -10,137 +10,163 @@ import std.stream; import std.array; import std.stdio; import std.algorithm; +import std.range; +import std.d.lexer; -import types, tokenizer; +import types; import langutils; +import circularbuffer; +alias CircularBuffer!Token TokenBuffer; + +class Balanced : TokenBuffer +{ +public: + + this(InputRange!Token tokens, TokenType open, TokenType close) + { + super(0, tokens); + this.open = open; + this.close = close; + } + + override bool empty() @property + { + return _empty; + } + + override Token front() const @property + { + return range.front; + } + + override void popFront() + { + range.popFront(); + if (range.front == open) + ++depth; + else if (range.front == close) + --depth; + _empty = depth == 0; + } + +private: + int depth; + TokenType open; + TokenType close; + TokenBuffer range; + bool _empty; +} /** * Params: * tokens = the array of tokens - * index = an index into tokens such that tokens[index].type == open + * index = an index into tokens such that tokens.front.type == open * open = the opening delimiter * close = the closing delimiter * Returns: all tokens that are between the balanced delimiters that start at - * tokens[index], not including the delimiters. If the delimiters in tokens + * tokens.front, not including the delimiters. If the delimiters in tokens * are not balanced, this function will return tokens[index + 1 .. $]; */ -const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open, - TokenType close) +Balanced betweenBalanced(TokenBuffer tokens, + TokenType open, TokenType close) in { - assert (tokens[index] == open); + assert (tokens.front == open); +} +body +{ + return new Balanced(tokens, open, close); +} + + +/** + * See_also: betweenBalanced + */ +Balanced betweenBalancedBraces(TokenBuffer tokens) +{ + return betweenBalanced(tokens, TokenType.LBrace, TokenType.RBrace); +} + + +/** + * See_also: betweenBalanced + */ +Balanced betweenBalancedParens(TokenBuffer tokens) +{ + return betweenBalanced(tokens, TokenType.LParen, TokenType.RParen); +} + + +/** + * See_also: betweenBalanced + */ +Balanced betweenBalancedBrackets(TokenBuffer tokens) +{ + return betweenBalanced(tokens, TokenType.LBracket, TokenType.RBracket); +} + +void skipBalanced(alias openToken, alias closeToken)(TokenBuffer tokens) +in +{ + assert (tokens.front == openToken); } body { - ++index; - size_t start = index; int depth = 1; - while (depth > 0 && index < tokens.length) + tokens.popFront(); + while (!tokens.empty && depth != 0) { - if (tokens[index] == open) ++depth; - else if (tokens[index] == close) --depth; - ++index; - } - return tokens[start .. index - 1]; -} - - -/** - * See_also: betweenBalanced - */ -const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index) -{ - return betweenBalanced(tokens, index, TokenType.LBrace, TokenType.RBrace); -} - - -/** - * See_also: betweenBalanced - */ -const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index) -{ - return betweenBalanced(tokens, index, TokenType.LParen, TokenType.RParen); -} - - -/** - * See_also: betweenBalanced - */ -const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index) -{ - return betweenBalanced(tokens, index, TokenType.LBracket, TokenType.RBracket); -} - - -/** - * If tokens[index] is currently openToken, advances index until it refers to a - * location in tokens directly after the balanced occurance of closeToken. If - * tokens[index] is closeToken, decrements index - * - */ -void skipBalanced(alias openToken, alias closeToken)(const Token[] tokens, ref size_t index) -{ - int depth = tokens[index] == openToken ? 1 : -1; - int deltaIndex = depth; - index += deltaIndex; - for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex) - { - switch (tokens[index].type) + switch (tokens.front.type) { case openToken: ++depth; break; case closeToken: --depth; break; default: break; } + tokens.popFront(); } } -void skipParens(const Token[] tokens, ref size_t index) +void skipParens(TokenBuffer tokens) { - skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens, index); + skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens); } -void skipBrackets(const Token[] tokens, ref size_t index) +void skipBrackets(TokenBuffer tokens) { - skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens, index); + skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens); } -void skipBraces(const Token[] tokens, ref size_t index) +void skipBraces(TokenBuffer tokens) { - skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens, index); + skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens); } /** * Params: * tokens = the token array to examine - * index = an indext into tokens such that tokens[index].type == open + * index = an indext into tokens such that tokens.front.type == open * open = the opening delimiter * close = the closing delimiter * Returns: a string representing the contents of the two delimiters. This will * not preserve whitespace, but it will place a single space character after * a comma and between identifiers. */ -string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close) +string content(TokenBuffer tokens, TokenType open, TokenType close) in { - assert (tokens[index] == open); + assert (tokens.front == open); } body { - index++; auto app = appender!string(); int depth = 1; - while (depth > 0 && index < tokens.length) + foreach (t; betweenBalanced(tokens, open, close)) { - if (tokens[index] == open) ++depth; - else if (tokens[index] == close) --depth; - else if (tokens[index] == TokenType.Comma) - { + if (t == TokenType.Comma) app.put(", "); - } else - app.put(tokens[index].value); - ++index; + app.put(t.value); } return app.data; } @@ -149,18 +175,18 @@ body /** * See_also: content */ -string parenContent(const Token[]tokens, ref size_t index) +string parenContent(TokenBuffer tokens) { - return "(" ~ content(tokens, index, TokenType.LParen, TokenType.RParen) ~ ")"; + return "(" ~ content(tokens, TokenType.LParen, TokenType.RParen) ~ ")"; } /** * See_also: content */ -string bracketContent(const Token[]tokens, ref size_t index) +string bracketContent(TokenBuffer tokens) { - return "[" ~ content(tokens, index, TokenType.LBracket, TokenType.RBracket) ~ "]"; + return "[" ~ content(tokens, TokenType.LBracket, TokenType.RBracket) ~ "]"; } @@ -169,14 +195,12 @@ string bracketContent(const Token[]tokens, ref size_t index) * index initially indexed a right brace, or advances index until it indexes a * character after a simicolon otherwise. */ -void skipBlockStatement(const Token[] tokens, ref size_t index) +void skipBlockStatement(TokenBuffer tokens) { - if (tokens[index] == TokenType.LBrace) - betweenBalancedBraces(tokens, index); + if (tokens.front == TokenType.LBrace) + skipBraces(tokens); else - { - skipPastNext(tokens, TokenType.Semicolon, index); - } + skipPastNext(tokens, TokenType.Semicolon); } @@ -185,50 +209,57 @@ void skipBlockStatement(const Token[] tokens, ref size_t index) * of type type. This function handles nesting of braces, brackets, and * parenthesis */ -void skipPastNext(const Token[] tokens, TokenType type, ref size_t index) +void skipPastNext(TokenBuffer tokens, TokenType type) { - while (index < tokens.length) + while (!tokens.empty) { - if (tokens[index].type == TokenType.LBrace) - betweenBalancedBraces(tokens, index); - else if (tokens[index].type == TokenType.LParen) - betweenBalancedParens(tokens, index); - else if (tokens[index].type == TokenType.LBracket) - betweenBalancedBrackets(tokens, index); - else if (tokens[index].type == type) + if (tokens.front.type == TokenType.LBrace) + skipBraces(tokens); + else if (tokens.front.type == TokenType.LParen) + skipParens(tokens); + else if (tokens.front.type == TokenType.LBracket) + skipBrackets(tokens); + else if (tokens.front.type == type) { - ++index; + tokens.popFront(); return; } else - ++index; + tokens.popFront(); } } -string parseTypeDeclaration(const Token[] tokens, ref size_t index) +string parseTypeDeclaration(TokenBuffer tokens) { - auto type = tokens[index++].value.idup; - buildingType: while (index < tokens.length) + auto type = tokens.front.value; + tokens.popFront(); + buildingType: while (!tokens.empty) { - switch (tokens[index].type) + switch (tokens.front.type) { case TokenType.LBracket: - type ~= bracketContent(tokens, index); + type ~= bracketContent(tokens); break; case TokenType.Not: - type ~= tokens[index++].value; - if (tokens[index] == TokenType.LParen) - type ~= parenContent(tokens, index); + type ~= tokens.front.value; + tokens.popFront(); + if (tokens.front == TokenType.LParen) + type ~= parenContent(tokens); else - type ~= tokens[index++].value; + { + type ~= tokens.front.value; + tokens.popFront(); + } break; case TokenType.Star: case TokenType.BitAnd: - type ~= tokens[index++].value; + type ~= tokens.front.value; + tokens.popFront(); break; case TokenType.Function: - type ~= " " ~ tokens[index++].value; - type ~= parenContent(tokens, index); + type ~= " " ~ tokens.front.value; + tokens.popFront(); + type ~= parenContent(tokens); break; default: break buildingType; @@ -244,7 +275,7 @@ string parseTypeDeclaration(const Token[] tokens, ref size_t index) * attributes = the default attributes for a block statement * Returns: the parsed module */ -Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = []) +Module parseModule(TokenBuffer tokens, string protection = "public", string[] attributes = []) { string type; string name; @@ -261,42 +292,45 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] Module mod = new Module; size_t index = 0; - while(index < tokens.length) + while(!tokens.empty) { - switch(tokens[index].type) + switch(tokens.front.type) { - case TokenType.Pragma: - ++index; - if (tokens[index] == TokenType.LParen) - skipParens(tokens, index); - break; + case TokenType.Pragma: + tokens.popFront(); + if (tokens.front == TokenType.LParen) + skipParens(tokens); + break; case TokenType.Mixin: case TokenType.Assert: - ++index; - tokens.skipBlockStatement(index); + tokens.popFront(); + tokens.skipBlockStatement(); break; case TokenType.Alias: - Alias a = parseAlias(tokens, index, + Alias a = parseAlias(tokens, localProtection.empty() ? protection : localProtection, attributes); mod.aliases ~= a; break; case TokenType.Import: - mod.imports ~= parseImports(tokens, index); + mod.imports ~= parseImports(tokens); resetLocals(); break; case TokenType.Version: - ++index; - if (tokens[index] == TokenType.LParen) + tokens.popFront(); + if (tokens.front == TokenType.LParen) { - tokens.betweenBalancedParens(index); - if (tokens[index] == TokenType.LBrace) - mod.merge(parseModule(betweenBalancedBraces(tokens, index), + tokens.betweenBalancedParens(); + if (tokens.front == TokenType.LBrace) + { + auto braceContent = tokens.betweenBalancedBraces(); + mod.merge(parseModule(braceContent, localProtection.empty() ? protection : localProtection, attributes)); + } } - else if (tokens[index] == TokenType.Assign) - tokens.skipBlockStatement(index); + else if (tokens.front == TokenType.Assign) + tokens.skipBlockStatement(); break; case TokenType.Deprecated: case TokenType.Nothrow: @@ -306,7 +340,8 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] case TokenType.Final: case TokenType.Gshared: case TokenType.Static: - localAttributes ~= tokens[index++].value; + localAttributes ~= tokens.front.value; + tokens.popFront(); break; case TokenType.Const: case TokenType.Immutable: @@ -314,10 +349,11 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] case TokenType.Pure: case TokenType.Scope: case TokenType.Shared: - auto tmp = tokens[index++].value; - if (tokens[index] == TokenType.LParen) - type = tmp ~ parenContent(tokens, index); - else if (tokens[index] == TokenType.Colon) + auto tmp = tokens.front.value; + tokens.popFront(); + if (tokens.front == TokenType.LParen) + type = tmp ~ tokens.parenContent(); + else if (tokens.front == TokenType.Colon) { index++; attributes ~= tmp; @@ -326,78 +362,83 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] break; case TokenType.Align: case TokenType.Extern: - string attribute = tokens[index++].value; - if (tokens[index] == TokenType.LParen) - attribute ~= parenContent(tokens, index); - if (tokens[index] == TokenType.LBrace) - mod.merge(parseModule(betweenBalancedBraces(tokens, index), + string attribute = tokens.front.value; + tokens.popFront(); + if (tokens.front == TokenType.LParen) + attribute ~= parenContent(tokens); + if (tokens.front == TokenType.LBrace) + mod.merge(parseModule(tokens.betweenBalancedBraces(), localProtection.empty() ? protection : localProtection, attributes ~ attribute)); - else if (tokens[index] == TokenType.Colon) + else if (tokens.front == TokenType.Colon) { - ++index; + tokens.popFront(); attributes ~= attribute; } else localAttributes ~= attribute; break; case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END: - string p = tokens[index++].value; - if (tokens[index] == TokenType.Colon) + string p = tokens.front.value; + tokens.popFront(); + if (tokens.front == TokenType.Colon) { protection = p; - ++index; + tokens.popFront(); } - else if (tokens[index] == TokenType.LBrace) - mod.merge(parseModule(betweenBalancedBraces(tokens, index), + else if (tokens.front == TokenType.LBrace) + mod.merge(parseModule(tokens.betweenBalancedBraces(), p, attributes ~ localAttributes)); else localProtection = p; break; case TokenType.Module: - ++index; - while (index < tokens.length && tokens[index] != TokenType.Semicolon) - mod.name ~= tokens[index++].value; - ++index; + tokens.popFront(); + while (!tokens.empty && tokens.front != TokenType.Semicolon) + { + mod.name ~= tokens.front.value; + tokens.popFront(); + } + tokens.popFront(); resetLocals(); break; case TokenType.Union: - mod.unions ~= parseUnion(tokens, index, + mod.unions ~= parseUnion(tokens, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; case TokenType.Class: - mod.classes ~= parseClass(tokens, index, + mod.classes ~= parseClass(tokens, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; case TokenType.Interface: - mod.interfaces ~= parseInterface(tokens, index, + mod.interfaces ~= parseInterface(tokens, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; case TokenType.Struct: - mod.structs ~= parseStruct(tokens, index, + mod.structs ~= parseStruct(tokens, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; case TokenType.Enum: - mod.enums ~= parseEnum(tokens, index, + mod.enums ~= parseEnum(tokens, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); resetLocals(); break; case TokenType.Template: - ++index; // template - ++index; // name - if (tokens[index] == TokenType.LParen) - tokens.betweenBalancedParens(index); // params - if (tokens[index] == TokenType.LBrace) - tokens.betweenBalancedBraces(index); // body + tokens.popFront(); // template + tokens.popFront(); // name + if (tokens.front == TokenType.LParen) + tokens.betweenBalancedParens(); // params + if (tokens.front == TokenType.LBrace) + tokens.betweenBalancedBraces(); // body resetLocals(); break; case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END: @@ -405,16 +446,17 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] case TokenType.Identifier: if (type.empty()) { - type = tokens.parseTypeDeclaration(index); + type = tokens.parseTypeDeclaration(); } else { - name = tokens[index++].value; - if (index >= tokens.length) break; - if (tokens[index] == TokenType.LParen) + name = tokens.front.value; + tokens.popFront(); + if (tokens.empty) break; + if (tokens.front == TokenType.LParen) { - mod.functions ~= parseFunction(tokens, index, type, name, - tokens[index].lineNumber, + mod.functions ~= parseFunction(tokens, type, name, + tokens.front.lineNumber, localProtection.empty() ? protection : localProtection, attributes ~ localAttributes); } @@ -425,39 +467,40 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] v.type = type; v.attributes = localAttributes ~ attributes; v.protection = localProtection.empty() ? protection : localProtection; - v.line = tokens[index].lineNumber; + v.line = tokens.front.lineNumber; mod.variables ~= v; } resetLocals(); } break; case TokenType.Unittest: - ++index; - if (!tokens.empty() && tokens[index] == TokenType.LBrace) - tokens.skipBlockStatement(index); + tokens.popFront(); + if (!tokens.empty() && tokens.front == TokenType.LBrace) + tokens.skipBlockStatement(); resetLocals(); break; case TokenType.Tilde: - ++index; - if (tokens[index] == TokenType.This) + tokens.popFront(); + if (tokens.front == TokenType.This) { name = "~"; goto case; } break; case TokenType.This: - name ~= tokens[index++].value; - if (index < tokens.length && tokens[index] == TokenType.LParen) + name ~= tokens.front.value; + tokens.popFront(); + if (!tokens.empty && tokens.front == TokenType.LParen) { - mod.functions ~= parseFunction(tokens, index, "", name, - tokens[index - 1].lineNumber, + mod.functions ~= parseFunction(tokens, "", name, + tokens.peek(-1).lineNumber, localProtection.empty() ? protection : localProtection, localAttributes ~ attributes); } resetLocals(); break; default: - ++index; + tokens.popFront(); break; } } @@ -470,32 +513,33 @@ Module parseModule(const Token[] tokens, string protection = "public", string[] * Returns: only the module names that were imported, not which symbols were * selectively improted. */ -string[] parseImports(const Token[] tokens, ref size_t index) +string[] parseImports(TokenBuffer tokens) { - assert(tokens[index] == TokenType.Import); - ++index; + assert(tokens.front == TokenType.Import); + tokens.popFront(); auto app = appender!(string[])(); string im; - while (index < tokens.length) + while (!tokens.empty) { - switch(tokens[index].type) + switch(tokens.front.type) { case TokenType.Comma: - ++index; + tokens.popFront(); app.put(im); im = ""; break; case TokenType.Assign: case TokenType.Semicolon: app.put(im); - ++index; + tokens.popFront(); return app.data; case TokenType.Colon: app.put(im); - tokens.skipBlockStatement(index); + tokens.skipBlockStatement(); return app.data; default: - im ~= tokens[index++].value; + im ~= tokens.front.value; + tokens.popFront(); break; } } @@ -506,92 +550,98 @@ string[] parseImports(const Token[] tokens, ref size_t index) /** * Parses an enum declaration */ -Enum parseEnum(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Enum parseEnum(TokenBuffer tokens, string protection, string[] attributes) in { - assert (tokens[index] == TokenType.Enum); + assert (tokens.front == TokenType.Enum); } body { Enum e = new Enum; - e.line = tokens[index].lineNumber; - ++index; + e.line = tokens.front.lineNumber; + tokens.popFront(); string enumType; e.protection = protection; - if (tokens[index] == TokenType.LBrace) + if (tokens.front == TokenType.LBrace) goto enumBody; - if (isIdentifierOrType(tokens[index])) + if (isIdentifierOrType(tokens.front)) { - if (index + 1 < tokens.length && tokens[index + 1] == TokenType.Identifier) + if (tokens.canPeek() && tokens.peek() == TokenType.Identifier) { // enum long l = 4; EnumMember m; - m.type = tokens[index++].value; - m.line = tokens[index].lineNumber; - e.name = m.name = tokens[index].value; + m.type = tokens.front.value; + tokens.popFront(); + m.line = tokens.front.lineNumber; + e.name = m.name = tokens.front.value; e.members ~= m; - skipBlockStatement(tokens, index); + tokens.skipBlockStatement(); return e; } - else if (index + 1 < tokens.length && tokens[index + 1] == TokenType.Assign) + else if (tokens.canPeek() && tokens.peek() == TokenType.Assign) { // enum m = "abcd"; - e.name = tokens[index].value; + e.name = tokens.front.value; EnumMember m; m.name = e.name; - m.line = tokens[index].lineNumber; - m.type = getTypeFromToken(tokens[index + 2]); + m.line = tokens.front.lineNumber; + m.type = getTypeFromToken(tokens.peek(2)); e.members ~= m; - skipBlockStatement(tokens, index); + tokens.skipBlockStatement(); return e; } } - if (isIdentifierOrType(tokens[index])) - e.name = tokens[index++].value; - - if (tokens[index] == TokenType.Colon) + if (isIdentifierOrType(tokens.front)) { - index++; - if (!isIdentifierOrType(tokens[index])) - skipBlockStatement(tokens, index); + e.name = tokens.front.value; + tokens.popFront(); + } + + if (tokens.front == TokenType.Colon) + { + tokens.popFront(); + if (!isIdentifierOrType(tokens.front)) + tokens.skipBlockStatement(); else - enumType = tokens[index++].value; + { + enumType = tokens.front.value; + tokens.popFront(); + } } enumBody: - - auto r = betweenBalancedBraces(tokens, index); - for (size_t i = 0; i < r.length;) - { - EnumMember m; - if (isIdentifierOrType(r[i]) && i + 1 < r.length && isIdentifierOrType(r[i + 1])) - { - m.line = r[i + 1].lineNumber; - m.name = r[i + 1].value; - m.type = r[i].value; - } - else if (isIdentifierOrType(r[i]) && i + 1 < r.length && r[i + 1] == TokenType.Assign) - { - if (enumType == null && i + 2 < r.length) - m.type = getTypeFromToken(r[i + 2]); - else - m.type = enumType; - m.line = r[i].lineNumber; - m.name = r[i].value; - } - else - { - m.line = r[i].lineNumber; - m.name = r[i].value; - m.type = enumType == null ? "int" : enumType; - } - e.members ~= m; - skipPastNext(r, TokenType.Comma, i); - } +// +// auto r = tokens.betweenBalancedBraces(); +// while (!r.empty) +// { +// EnumMember m; +// if (isIdentifierOrType(r.front) && i + 1 < r.length && isIdentifierOrType(r[i + 1])) +// { +// m.line = r[i + 1].lineNumber; +// m.name = r[i + 1].value; +// m.type = r.front.value; +// } +// else if (isIdentifierOrType(r.front) && i + 1 < r.length && r[i + 1] == TokenType.Assign) +// { +// if (enumType == null && i + 2 < r.length) +// m.type = getTypeFromToken(r[i + 2]); +// else +// m.type = enumType; +// m.line = r.front.lineNumber; +// m.name = r.front.value; +// } +// else +// { +// m.line = r.front.lineNumber; +// m.name = r.front.value; +// m.type = enumType == null ? "int" : enumType; +// } +// e.members ~= m; +// skipPastNext(r, TokenType.Comma, i); +// } return e; } @@ -599,11 +649,11 @@ enumBody: /** * Parses a function declaration */ -Function parseFunction(const Token[] tokens, ref size_t index, string type, +Function parseFunction(TokenBuffer tokens, string type, string name, uint line, string protection, string[] attributes) in { - assert (tokens[index] == TokenType.LParen); + assert (tokens.front == TokenType.LParen); } body { @@ -613,20 +663,20 @@ body f.line = line; f.attributes.insertInPlace(f.attributes.length, attributes); - Variable[] vars1 = parseParameters(tokens, index); - if (index < tokens.length && tokens[index] == TokenType.LParen) + Variable[] vars1 = parseParameters(tokens); + if (!tokens.empty && tokens.front == TokenType.LParen) { f.templateParameters.insertInPlace(f.templateParameters.length, map!("a.type")(vars1)); f.parameters.insertInPlace(f.parameters.length, - parseParameters(tokens, index)); + parseParameters(tokens)); } else f.parameters.insertInPlace(f.parameters.length, vars1); - attributeLoop: while(index < tokens.length) + attributeLoop: while(!tokens.empty) { - switch (tokens[index].type) + switch (tokens.front.type) { case TokenType.Immutable: case TokenType.Const: @@ -634,62 +684,64 @@ body case TokenType.Nothrow: case TokenType.Final: case TokenType.Override: - f.attributes ~= tokens[index++].value; + f.attributes ~= tokens.front.value; + tokens.popFront(); break; default: break attributeLoop; } } - if (index < tokens.length && tokens[index] == TokenType.If) - f.constraint = parseConstraint(tokens, index); + if (!tokens.empty && tokens.front == TokenType.If) + f.constraint = parseConstraint(tokens); - while (index < tokens.length && - (tokens[index] == TokenType.In || tokens[index] == TokenType.Out - || tokens[index] == TokenType.Body)) + while (!tokens.empty && + (tokens.front == TokenType.In || tokens.front == TokenType.Out + || tokens.front == TokenType.Body)) { - ++index; - if (index < tokens.length && tokens[index] == TokenType.LParen - && tokens[index - 1] == TokenType.Out) + tokens.popFront(); + if (!tokens.empty && tokens.front == TokenType.LParen + && tokens.peek(-1) == TokenType.Out) { - tokens.skipParens(index); + tokens.skipParens(); } - if (index < tokens.length && tokens[index] == TokenType.LBrace) - tokens.skipBlockStatement(index); + if (!tokens.empty && tokens.front == TokenType.LBrace) + tokens.skipBlockStatement(); } - if (index >= tokens.length) + if (!tokens.empty) return f; - if (tokens[index] == TokenType.LBrace) - tokens.skipBlockStatement(index); - else if (tokens[index] == TokenType.Semicolon) - ++index; + if (tokens.front == TokenType.LBrace) + tokens.skipBlockStatement(); + else if (tokens.front == TokenType.Semicolon) + tokens.popFront(); return f; } -string parseConstraint(const Token[] tokens, ref size_t index) +string parseConstraint(TokenBuffer tokens) { auto appender = appender!(string)(); - assert(tokens[index] == TokenType.If); - appender.put(tokens[index++].value); - assert(tokens[index] == TokenType.LParen); - return "if " ~ parenContent(tokens, index); + assert(tokens.front == TokenType.If); + appender.put(tokens.front.value); + tokens.popFront(); + assert(tokens.front == TokenType.LParen); + return "if " ~ tokens.parenContent(); } -Variable[] parseParameters(const Token[] tokens, ref size_t index) +Variable[] parseParameters(TokenBuffer tokens) in { - assert (tokens[index] == TokenType.LParen); + assert (tokens.front == TokenType.LParen); } body { auto appender = appender!(Variable[])(); Variable v = new Variable; - auto r = betweenBalancedParens(tokens, index); + auto r = betweenBalancedParens(tokens); size_t i = 0; - while (i < r.length) + while (!r.empty) { - switch(r[i].type) + switch(r.front.type) { case TokenType.Alias: case TokenType.In: @@ -701,15 +753,16 @@ body case TokenType.Immutable: case TokenType.Shared: case TokenType.Inout: - auto tmp = r[i++].value; - if (r[i] == TokenType.LParen) - v.type ~= tmp ~ parenContent(r, i); + auto tmp = r.front.value; + r.popFront(); + if (r.front == TokenType.LParen) + v.type ~= tmp ~ parenContent(r); else v.attributes ~= tmp; break; case TokenType.Colon: i++; - r.skipPastNext(TokenType.Comma, i); + r.skipPastNext(TokenType.Comma); appender.put(v); v = new Variable; break; @@ -721,21 +774,22 @@ body default: if (v.type.empty()) { - v.type = r.parseTypeDeclaration(i); - if (i >= r.length) + v.type = r.parseTypeDeclaration(); + if (!r.empty) appender.put(v); } else { - v.line = r[i].lineNumber; - v.name = r[i++].value; + v.line = r.front.lineNumber; + v.name = r.front.value; + r.popFront(); appender.put(v); - if (i < r.length && r[i] == TokenType.Vararg) + if (!r.empty && r.front == TokenType.Vararg) { v.type ~= " ..."; } v = new Variable; - r.skipPastNext(TokenType.Comma, i); + r.skipPastNext(TokenType.Comma); } break; } @@ -743,23 +797,23 @@ body return appender.data; } -string[] parseBaseClassList(const Token[] tokens, ref size_t index) +string[] parseBaseClassList(TokenBuffer tokens) in { - assert(tokens[index] == TokenType.Colon); + assert(tokens.front == TokenType.Colon); } body { auto appender = appender!(string[])(); - ++index; - while (index < tokens.length) + tokens.popFront(); + while (!tokens.empty) { - if (tokens[index] == TokenType.Identifier) + if (tokens.front == TokenType.Identifier) { - string base = parseTypeDeclaration(tokens, index); + string base = parseTypeDeclaration(tokens); appender.put(base); - if (tokens[index] == TokenType.Comma) - ++index; + if (tokens.front == TokenType.Comma) + tokens.popFront(); else break; } @@ -769,117 +823,118 @@ body return appender.data; } -void parseStructBody(const Token[] tokens, ref size_t index, Struct st) +void parseStructBody(TokenBuffer tokens, Struct st) { - st.bodyStart = tokens[index].startIndex; - Module m = parseModule(betweenBalancedBraces(tokens, index)); - st.bodyEnd = tokens[index - 1].startIndex; + st.bodyStart = tokens.front.startIndex; + Module m = parseModule(tokens.betweenBalancedBraces()); + st.bodyEnd = tokens.peek(-1).startIndex; st.functions.insertInPlace(0, m.functions); st.variables.insertInPlace(0, m.variables); st.aliases.insertInPlace(0, m.aliases); } -Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection, +Struct parseStructOrUnion(TokenBuffer tokens, string protection, string[] attributes) { Struct s = new Struct; - s.line = tokens[index].lineNumber; + s.line = tokens.front.lineNumber; s.attributes = attributes; s.protection = protection; - s.name = tokens[index++].value; - if (tokens[index] == TokenType.LParen) + s.name = tokens.front.value; + tokens.popFront(); + if (tokens.front == TokenType.LParen) s.templateParameters.insertInPlace(s.templateParameters.length, - map!("a.type")(parseParameters(tokens, index))); + map!("a.type")(parseParameters(tokens))); - if (index >= tokens.length) return s; + if (tokens.empty) return s; - if (tokens[index] == TokenType.If) - s.constraint = parseConstraint(tokens, index); + if (tokens.front == TokenType.If) + s.constraint = parseConstraint(tokens); - if (index >= tokens.length) return s; + if (tokens.empty) return s; - if (tokens[index] == TokenType.LBrace) - parseStructBody(tokens, index, s); + if (tokens.front == TokenType.LBrace) + parseStructBody(tokens, s); else - tokens.skipBlockStatement(index); + tokens.skipBlockStatement(); return s; } -Struct parseStruct(const Token[] tokens, ref size_t index, string protection, +Struct parseStruct(TokenBuffer tokens, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.Struct); + assert(tokens.front == TokenType.Struct); } body { - return parseStructOrUnion(tokens, ++index, protection, attributes); + return parseStructOrUnion(tokens, protection, attributes); } -Struct parseUnion(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Struct parseUnion(TokenBuffer tokens, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.Union); + assert(tokens.front == TokenType.Union); } body { - return parseStructOrUnion(tokens, ++index, protection, attributes); + tokens.popFront(); + return parseStructOrUnion(tokens, protection, attributes); } -Inherits parseInherits(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Inherits parseInherits(TokenBuffer tokens, string protection, string[] attributes) { auto i = new Inherits; - i.line = tokens[index].lineNumber; - i.name = tokens[index++].value; + i.line = tokens.front.lineNumber; + i.name = tokens.front.value; + tokens.popFront(); i.protection = protection; i.attributes.insertInPlace(i.attributes.length, attributes); - if (tokens[index] == TokenType.LParen) + if (tokens.front == TokenType.LParen) i.templateParameters.insertInPlace(i.templateParameters.length, - map!("a.type")(parseParameters(tokens, index))); + map!("a.type")(parseParameters(tokens))); - if (index >= tokens.length) return i; + if (tokens.empty) return i; - if (tokens[index] == TokenType.If) - i.constraint = parseConstraint(tokens, index); + if (tokens.front == TokenType.If) + i.constraint = parseConstraint(tokens); - if (index >= tokens.length) return i; + if (tokens.empty) return i; - if (tokens[index] == TokenType.Colon) - i.baseClasses = parseBaseClassList(tokens, index); + if (tokens.front == TokenType.Colon) + i.baseClasses = parseBaseClassList(tokens); - if (index >= tokens.length) return i; + if (tokens.empty) return i; - if (tokens[index] == TokenType.LBrace) - parseStructBody(tokens, index, i); + if (tokens.front == TokenType.LBrace) + parseStructBody(tokens, i); else - tokens.skipBlockStatement(index); + tokens.skipBlockStatement(); return i; } -Inherits parseInterface(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Inherits parseInterface(TokenBuffer tokens, string protection, string[] attributes) in { - assert (tokens[index] == TokenType.Interface); + assert (tokens.front == TokenType.Interface); } body { - return parseInherits(tokens, ++index, protection, attributes); + tokens.popFront(); + return parseInherits(tokens, protection, attributes); } -Inherits parseClass(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Inherits parseClass(TokenBuffer tokens, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.Class); + assert(tokens.front == TokenType.Class); } body { - return parseInherits(tokens, ++index, protection, attributes); + tokens.popFront(); + return parseInherits(tokens, protection, attributes); } @@ -889,24 +944,23 @@ body * but there seems to be no example of this being used, nor has the compiler * accepted any of my attempts to create one. Therefore, it's not supported here */ -Alias parseAlias(const Token[] tokens, ref size_t index, string protection, - string[] attributes) +Alias parseAlias(TokenBuffer tokens, string protection, string[] attributes) in { - assert(tokens[index] == TokenType.Alias); + assert(tokens.front == TokenType.Alias); } body { - index++; + tokens.popFront(); Alias a = new Alias; - a.aliasedType = parseTypeDeclaration(tokens, index); + a.aliasedType = parseTypeDeclaration(tokens); a.attributes = attributes; a.protection = protection; - if (tokens[index] == TokenType.Identifier) + if (tokens.front == TokenType.Identifier) { - a.name = tokens[index].value; - a.line = tokens[index].lineNumber; - skipBlockStatement(tokens, index); + a.name = tokens.front.value; + a.line = tokens.front.lineNumber; + skipBlockStatement(tokens); } else return null; diff --git a/entities.d b/std/d/entities.d similarity index 99% rename from entities.d rename to std/d/entities.d index 8b276cb..c4e8bc3 100644 --- a/entities.d +++ b/std/d/entities.d @@ -1,7 +1,15 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt)module entities; +// Written in the D programming language + +/** + * Contains listing of named entities for the D lexer. + * + * Copyright: Brian Schott 2013 + * License: Boost License 1.0. + * Authors: Brian Schott + * Source: $(PHOBOSSRC std/d/_lexer.d) + */ + +module std.d.entities; /** * Generated from $(LINK http://www.w3.org/TR/html5/entities.json) diff --git a/tokenizer.d b/std/d/lexer.d similarity index 56% rename from tokenizer.d rename to std/d/lexer.d index 3dea8ad..4793911 100644 --- a/tokenizer.d +++ b/std/d/lexer.d @@ -1,26 +1,658 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) +// Written in the D programming language -module tokenizer; +/** + * This module contains a range-based lexer for the D programming language. + * + * Copyright: Brian Schott 2013 + * License: Boost License 1.0. + * Authors: Brian Schott + * Source: $(PHOBOSSRC std/d/_lexer.d) + */ + +module std.d.lexer; import std.range; -import std.file; import std.traits; import std.algorithm; import std.conv; import std.uni; -import std.stdio; import std.ascii; -import std.format; import std.exception; +import std.d.entities; -import langutils; -import codegen; -import entities; +public: -pure bool isNewline(R)(R range) +/** + * Represents a D token + */ +struct Token +{ + /// The token type. + TokenType type; + + /// The representation of the token in the original source code. + string value; + + /// The number of the line the token is on. + uint lineNumber; + + /// The character index of the start of the token in the original text. + uint startIndex; + + /** + * Check to see if the token is of the same type and has the same string + * representation as the given token. + */ + bool opEquals(ref const(Token) other) const + { + return other.type == type && other.value == value; + } + + /** + * Checks to see if the token's string representation is equal to the given + * string. + */ + bool opEquals(string value) const { return this.value == value; } + + /** + * Checks to see if the token is of the given type. + */ + bool opEquals(TokenType type) const { return type == type; } + + /** + * Comparison operator orders tokens by start index. + */ + int opCmp(size_t i) const + { + if (startIndex < i) return -1; + if (startIndex > i) return 1; + return 0; + } +} + +/** + * Configure the behavior of the byToken() function + */ +enum IterationStyle +{ + /// Only include code, not whitespace or comments + CodeOnly = 0, + /// Includes comments + IncludeComments = 0b01, + /// Includes whitespace + IncludeWhitespace = 0b10, + /// Include everything + Everything = IncludeComments | IncludeWhitespace +} + +/** + * Configuration of the string lexing style + */ +enum StringStyle : uint +{ + /** + * Escape sequences will be replaced with their equivalent characters, + * enclosing quote characters will not be included. Useful for creating a + * compiler or interpreter. + */ + Default = 0b0000, + + /** + * Escape sequences will not be processed. An escaped quote character will + * not terminate string lexing, but it will not be replaced with the quote + * character in the token. + */ + NotEscaped = 0b0001, + + /** + * Strings will include their opening and closing quote characters as well + * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will + * include the $(D_STRING 'w') character as well as the opening and closing + * quotes$(RPAREN) + */ + IncludeQuotes = 0x0010, + + /** + * Strings will be read exactly as they appeared in the source, including + * their opening and closing quote characters. Useful for syntax + * highlighting. + */ + Source = NotEscaped | IncludeQuotes, +} + +/** + * Iterate over the given range of characters by D tokens. + * Params: + * range = the range of characters + * iterationStyle = See IterationStyle + * stringStyle = see StringStyle + * Returns: + * an input range of tokens + */ +TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly, + const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R))) +{ + auto r = new TokenRange!(R)(range); + r.stringStyle = stringStyle; + r.iterStyle = iterationStyle; + r.lineNumber = 1; + r.popFront(); + return r; +} + +/** + * Range of tokens + */ +class TokenRange(R) : InputRange!(Token) +{ + this(ref R range) + { + this.range = range; + } + + /** + * Returns: true if the range is empty + */ + override bool empty() const @property + { + return _empty; + } + + /** + * Returns: the current token + */ + override Token front() const @property + { + enforce(!_empty, "Cannot call front() on empty token range"); + return current; + } + + /** + * Returns the current token and then removes it from the range + */ + override Token moveFront() + { + auto r = front(); + popFront(); + return r; + } + + override int opApply(int delegate(Token) dg) + { + int result = 0; + while (!empty) + { + result = dg(front); + if (result) + break; + popFront(); + } + return result; + } + + override int opApply(int delegate(size_t, Token) dg) + { + int result = 0; + int i = 0; + while (!empty) + { + result = dg(i, front); + if (result) + break; + popFront(); + } + return result; + } + + /** + * Removes the current token from the range + */ + override void popFront() + { + if (range.empty) + { + _empty = true; + return; + } + + current = Token.init; + current.lineNumber = lineNumber; + current.startIndex = index; + + while (std.uni.isWhite(range.front)) + { + if (iterStyle == IterationStyle.Everything) + { + current = lexWhitespace(range, index, lineNumber); + return; + } + else + lexWhitespace(range, index, lineNumber); + } + outer: switch (range.front) + { + mixin(generateCaseTrie( + "=", "TokenType.Assign", + "&", "TokenType.BitAnd", + "&=", "TokenType.BitAndEquals", + "|", "TokenType.BitOr", + "|=", "TokenType.BitOrEquals", + "~=", "TokenType.CatEquals", + ":", "TokenType.Colon", + ",", "TokenType.Comma", + "$", "TokenType.Dollar", + ".", "TokenType.Dot", + "==", "TokenType.Equals", + "=>", "TokenType.GoesTo", + ">", "TokenType.Greater", + ">=", "TokenType.GreaterEqual", + "#", "TokenType.Hash", + "&&", "TokenType.LogicAnd", + "{", "TokenType.LBrace", + "[", "TokenType.LBracket", + "<", "TokenType.Less", + "<=", "TokenType.LessEqual", + "<>=", "TokenType.LessEqualGreater", + "<>", "TokenType.LessOrGreater", + "||", "TokenType.LogicOr", + "(", "TokenType.LParen", + "-", "TokenType.Minus", + "-=", "TokenType.MinusEquals", + "%", "TokenType.Mod", + "%=", "TokenType.ModEquals", + "*=", "TokenType.MulEquals", + "!", "TokenType.Not", + "!=", "TokenType.NotEquals", + "!>", "TokenType.NotGreater", + "!>=", "TokenType.NotGreaterEqual", + "!<", "TokenType.NotLess", + "!<=", "TokenType.NotLessEqual", + "!<>", "TokenType.NotLessEqualGreater", + "+", "TokenType.Plus", + "+=", "TokenType.PlusEquals", + "^^", "TokenType.Pow", + "^^=", "TokenType.PowEquals", + "}", "TokenType.RBrace", + "]", "TokenType.RBracket", + ")", "TokenType.RParen", + ";", "TokenType.Semicolon", + "<<", "TokenType.ShiftLeft", + "<<=", "TokenType.ShiftLeftEqual", + ">>", "TokenType.ShiftRight", + ">>=", "TokenType.ShiftRightEqual", + "..", "TokenType.Slice", + "*", "TokenType.Star", + "?", "TokenType.Ternary", + "~", "TokenType.Tilde", + "--", "TokenType.Decrement", + "!<>=", "TokenType.Unordered", + ">>>", "TokenType.UnsignedShiftRight", + ">>>=", "TokenType.UnsignedShiftRightEqual", + "++", "TokenType.Increment", + "...", "TokenType.Vararg", + "^", "TokenType.Xor", + "^=", "TokenType.XorEquals", + "@", "TokenType.At", + )); + case '0': .. case '9': + current = lexNumber(range, index, lineNumber); + break; + case '\'': + case '"': + current = lexString(range, index, lineNumber, stringStyle); + break; + case '`': + current = lexString(range, index, lineNumber, stringStyle); + break; + case 'q': + auto r = range.save; + r.popFront(); + if (!r.isEoF() && r.front == '{') + { + current = lexTokenString(range, index, lineNumber, stringStyle); + break; + } + else if (!r.isEoF() && r.front == '"') + { + current = lexDelimitedString(range, index, lineNumber, + stringStyle); + break; + } + else + goto default; + case '/': + auto r = range.save(); + r.popFront(); + if (r.isEoF()) + { + current.type = TokenType.Div; + current.value = "/"; + range.popFront(); + ++index; + break; + } + switch (r.front) + { + case '/': + case '*': + case '+': + current = lexComment(range, index, lineNumber); + break outer; + case '=': + current.type = TokenType.DivEquals; + current.value = "/="; + range.popFront(); + range.popFront(); + index += 2; + break outer; + default: + current.type = TokenType.Div; + current.value = "/"; + ++index; + range.popFront(); + break outer; + } + case 'r': + auto r = range.save(); + r.popFront(); + if (!r.isEoF() && r.front == '"') + { + current = lexString(range, index, lineNumber, stringStyle); + break; + } + else + goto default; + case 'x': + auto r = range.save(); + r.popFront(); + if (!r.isEoF() && r.front == '"') + { + current = lexHexString(range, index, lineNumber); + break; + } + else + goto default; + default: + auto app = appender!(ElementType!(R)[])(); + while(!range.isEoF() && !isSeparating(range.front)) + { + app.put(range.front); + range.popFront(); + ++index; + } + current.value = to!string(app.data); + current.type = lookupTokenType(current.value); + break; + } + } + +private: + Token current; + uint lineNumber; + uint index; + R range; + bool _empty; + IterationStyle iterStyle; + StringStyle stringStyle; +} + +/** + * Listing of all the tokens in the D language. + * + * Token types are arranged so that it is easy to group tokens while iterating + * over them. For example: + * --- + * assert(TokenType.Increment < TokenType.OPERATORS_END); + * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN); + * --- + * The non-token values are documented below: + * + * $(BOOKTABLE , + * $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples)) + * $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=)) + * $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double)) + * $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert)) + * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared)) + * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected)) + * $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__)) + * $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123)) + * $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110)) + * $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde")) + * $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers)) + * ) + * Note that several of the above ranges overlap. + */ +enum TokenType: uint +{ + // Operators + OPERATORS_BEGIN, /// + Assign, /// = + At, /// @ + BitAnd, /// & + BitAndEquals, /// &= + BitOr, /// | + BitOrEquals, /// |= + CatEquals, /// ~= + Colon, /// : + Comma, /// , + Decrement, /// -- + Div, /// / + DivEquals, /// /= + Dollar, /// $ + Dot, /// . + Equals, /// == + GoesTo, // => + Greater, /// > + GreaterEqual, /// >= + Hash, // # + Increment, /// ++ + LBrace, /// { + LBracket, /// [ + Less, /// < + LessEqual, /// <= + LessEqualGreater, // <>= + LessOrGreater, /// <> + LogicAnd, /// && + LogicOr, /// || + LParen, /// $(LPAREN) + Minus, /// - + MinusEquals, /// -= + Mod, /// % + ModEquals, /// %= + MulEquals, /// *= + Not, /// ! + NotEquals, /// != + NotGreater, /// !> + NotGreaterEqual, /// !>= + NotLess, /// !< + NotLessEqual, /// !<= + NotLessEqualGreater, /// !<> + Plus, /// + + PlusEquals, /// += + Pow, /// ^^ + PowEquals, /// ^^= + RBrace, /// } + RBracket, /// ] + RParen, /// $(RPAREN) + Semicolon, /// ; + ShiftLeft, /// << + ShiftLeftEqual, /// <<= + ShiftRight, /// >> + ShiftRightEqual, /// >>= + Slice, // .. + Star, /// * + Ternary, /// ? + Tilde, /// ~ + Unordered, /// !<>= + UnsignedShiftRight, /// >>> + UnsignedShiftRightEqual, /// >>>= + Vararg, /// ... + Xor, /// ^ + XorEquals, /// ^= + OPERATORS_END, /// + + // Types + TYPES_BEGIN, /// + Bool, /// bool, + Byte, /// byte, + Cdouble, /// cdouble, + Cent, /// cent, + Cfloat, /// cfloat, + Char, /// char, + Creal, /// creal, + Dchar, /// dchar, + Double, /// double, + DString, /// dstring + Float, /// float, + Function, /// function, + Idouble, /// idouble, + Ifloat, /// ifloat, + Int, /// int, + Ireal, /// ireal, + Long, /// long, + Real, /// real, + Short, /// short, + String, /// string + Ubyte, /// ubyte, + Ucent, /// ucent, + Uint, /// uint, + Ulong, /// ulong, + Ushort, /// ushort, + Void, /// void, + Wchar, /// wchar, + WString, /// wstring + TYPES_END, /// + + Template, /// template, + + // Keywords + KEYWORDS_BEGIN, /// + ATTRIBUTES_BEGIN, /// + Align, /// align, + Deprecated, /// deprecated, + Extern, /// extern, + Pragma, /// pragma, + PROTECTION_BEGIN, /// + Export, /// export, + Package, /// package, + Private, /// private, + Protected, /// protected, + Public, /// public, + PROTECTION_END, /// + Abstract, /// abstract, + AtDisable, /// @disable + Auto, /// auto, + Const, /// const, + Final, /// final + Gshared, /// __gshared, + Immutable, // immutable, + Inout, // inout, + Scope, /// scope, + Shared, // shared, + Static, /// static, + Synchronized, /// synchronized, + ATTRIBUTES_END, /// + Alias, /// alias, + Asm, /// asm, + Assert, /// assert, + Body, /// body, + Break, /// break, + Case, /// case, + Cast, /// cast, + Catch, /// catch, + Class, /// class, + Continue, /// continue, + Debug, /// debug, + Default, /// default, + Delegate, /// delegate, + Delete, /// delete, + Do, /// do, + Else, /// else, + Enum, /// enum, + False, /// false, + Finally, /// finally, + Foreach, /// foreach, + Foreach_reverse, /// foreach_reverse, + For, /// for, + Goto, /// goto, + If, /// if , + Import, /// import, + In, /// in, + Interface, /// interface, + Invariant, /// invariant, + Is, /// is, + Lazy, /// lazy, + Macro, /// macro, + Mixin, /// mixin, + Module, /// module, + New, /// new, + Nothrow, /// nothrow, + Null, /// null, + Out, /// out, + Override, /// override, + Pure, /// pure, + Ref, /// ref, + Return, /// return, + Struct, /// struct, + Super, /// super, + Switch, /// switch , + This, /// this, + Throw, /// throw, + True, /// true, + Try, /// try, + Typedef, /// typedef, + Typeid, /// typeid, + Typeof, /// typeof, + Union, /// union, + Unittest, /// unittest, + Version, /// version, + Volatile, /// volatile, + While, /// while , + With, /// with, + KEYWORDS_END, /// + + // Constants + CONSTANTS_BEGIN, + File, /// __FILE__, + Line, /// __LINE__, + Thread, /// __thread, + Traits, /// __traits, + CONSTANTS_END, /// + + // Misc + MISC_BEGIN, /// + Comment, /// /** comment */ or // comment or ///comment + Identifier, /// anything else + ScriptLine, // Line at the beginning of source file that starts from #! + Whitespace, /// whitespace + MISC_END, /// + + // Literals + LITERALS_BEGIN, /// + NUMBERS_BEGIN, /// + DoubleLiteral, /// 123.456 + FloatLiteral, /// 123.456f or 0x123_45p-af + IDoubleLiteral, /// 123.456i + IFloatLiteral, /// 123.456fi + IntLiteral, /// 123 or 0b1101010101 + LongLiteral, /// 123L + RealLiteral, /// 123.456L + IRealLiteral, /// 123.456Li + UnsignedIntLiteral, /// 123u + UnsignedLongLiteral, /// 123uL + NUMBERS_END, /// + STRINGS_BEGIN, /// + DStringLiteral, /// "32-bit character string"d + StringLiteral, /// "a string" + WStringLiteral, /// "16-bit character string"w + STRINGS_END, /// + LITERALS_END, /// +} + +// Implementation details follow +private: + +private pure bool isNewline(R)(R range) { return range.front == '\n' || range.front == '\r'; } @@ -30,7 +662,8 @@ pure bool isEoF(R)(R range) return range.empty || range.front == 0 || range.front == 0x1a; } -C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index) if (isSomeChar!C && isForwardRange!R) +C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index) + if (isSomeChar!C && isForwardRange!R) { C[] chars; if (range.front == '\r') @@ -56,11 +689,8 @@ unittest assert (s == "test"); } -/** - * Returns: - */ -Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index, ref uint lineNumber) - if (isForwardRange!R && isSomeChar!C) +Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index, + ref uint lineNumber) if (isForwardRange!R && isSomeChar!C) { Token t; t.type = TokenType.Whitespace; @@ -97,15 +727,6 @@ unittest assert (lineNum == 3); } -/** - * Increments endIndex until it indexes a character directly after a comment - * Params: - * inputString = the source code to examine - * endIndex = an index into inputString at the second character of a - * comment, i.e. points at the second slash in a // comment. - * lineNumber = the line number that corresponds to endIndex - * Returns: The comment - */ Token lexComment(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber) if (isSomeChar!C && isForwardRange!R) in @@ -252,9 +873,6 @@ unittest assert (comment == ""); } -/** - * Pops up to upTo hex chars from the input range and returns them as a string - */ string popDigitChars(R, C = ElementType!R, alias isInterestingDigit)(ref R input, ref uint index, uint upTo) if (isSomeChar!C && isForwardRange!R) { @@ -628,6 +1246,250 @@ unittest assert (lexString(g, i, l) == "a\nb"); } +Token lexDelimitedString(R)(ref R input, ref uint index, + ref uint lineNumber, const StringStyle stringStyle = StringStyle.Default) +in +{ + assert(input.front == 'q'); +} +body +{ + auto app = appender!(ElementType!R[])(); + Token t; + t.startIndex = index; + t.lineNumber = lineNumber; + t.type = TokenType.StringLiteral; + + input.popFront(); // q + input.popFront(); // " + index += 2; + if (stringStyle & StringStyle.IncludeQuotes) + { + app.put('q'); + app.put('"'); + } + + bool heredoc; + ElementType!R open; + ElementType!R close; + + switch (input.front) + { + case '[': open = '['; close = ']'; break; + case '{': open = '{'; close = '}'; break; + case '(': open = '('; close = ')'; break; + case '<': open = '<'; close = '>'; break; + default: heredoc = true; break; + } + + if (heredoc) + { + auto hereOpen = appender!(ElementType!(R)[])(); + while (!input.isEoF() && !std.uni.isWhite(input.front)) + { + hereOpen.put(input.front()); + input.popFront(); + } + if (input.isNewline()) + { + ++lineNumber; + input.popNewline(index); + } +// else +// this is an error + while (!input.isEoF()) + { + if (isNewline(input)) + { + ++lineNumber; + app.put(input.popNewline(index)); + } + else if (input.front == '"' && app.data.endsWith(hereOpen.data)) + { + app.put('"'); + ++index; + input.popFront(); + if (stringStyle & StringStyle.IncludeQuotes) + t.value = to!string(app.data); + else + t.value = to!string(app.data[0 .. app.data.length - hereOpen.data.length - 1]); + break; + } + else + { + app.put(input.front); + ++index; + input.popFront(); + } + } + } + else + { + if (stringStyle & StringStyle.IncludeQuotes) + app.put(input.front); + input.popFront(); + int depth = 1; + while (depth > 0 && !input.isEoF()) + { + if (isNewline(input)) + app.put(popNewline(input, index)); + else + { + if (input.front == close) + { + --depth; + if (depth == 0) + { + if (stringStyle & StringStyle.IncludeQuotes) + { + app.put(close); + app.put('"'); + } + input.popFront(); + input.popFront(); + break; + } + } + else if (input.front == open) + ++depth; + app.put(input.front); + input.popFront(); + ++index; + } + } + } + if (!input.isEoF()) + { + switch (input.front) + { + case 'w': + t.type = TokenType.WStringLiteral; + goto case 'c'; + case 'd': + t.type = TokenType.DStringLiteral; + goto case 'c'; + case 'c': + if (stringStyle & StringStyle.IncludeQuotes) + app.put(input.front); + input.popFront(); + ++index; + break; + default: + break; + } + } + if (t.value is null) + t.value = to!string(app.data); + return t; +} + +unittest +{ + uint i; + uint l; + auto a = `q"{abc{}de}"`; + auto ar = lexDelimitedString(a, i, l); + assert (ar == "abc{}de"); + assert (ar == TokenType.StringLiteral); + + auto b = "q\"abcde\n123\nabcde\"w"; + auto br = lexDelimitedString(b, i, l); + assert (br == "123\n"); + assert (br == TokenType.WStringLiteral); + + auto c = `q"[]");`; + auto cr = lexDelimitedString(c, i, l, StringStyle.Source); + assert (cr == `q"[ ]"`); + assert (cr == TokenType.StringLiteral); +} + +Token lexTokenString(R)(ref R input, ref uint index, ref uint lineNumber, + const StringStyle stringStyle = StringStyle.Default) +in +{ + assert (input.front == 'q'); +} +body +{ + Token t; + t.startIndex = index; + t.type = TokenType.StringLiteral; + t.lineNumber = lineNumber; + auto app = appender!(ElementType!(R)[])(); + input.popFront(); // q + input.popFront(); // { + index += 2; + if (stringStyle & StringStyle.IncludeQuotes) + { + app.put('q'); + app.put('{'); + } + auto r = byToken(input, IterationStyle.Everything, StringStyle.Source); + r.index = index; + int depth = 1; + while (!r.empty) + { + if (r.front == TokenType.LBrace) + { + ++depth; + } + else if (r.front == TokenType.RBrace) + { + --depth; + if (depth <= 0) + { + if (stringStyle & StringStyle.IncludeQuotes) + app.put('}'); + r.popFront(); + break; + } + } + app.put(r.front.value); + r.popFront(); + } + + auto n = app.data.length - (stringStyle & StringStyle.IncludeQuotes ? 2 : 0); + input.popFrontN(n); + if (!input.isEoF()) + { + switch (input.front) + { + case 'w': + t.type = TokenType.WStringLiteral; + goto case 'c'; + case 'd': + t.type = TokenType.DStringLiteral; + goto case 'c'; + case 'c': + if (stringStyle & StringStyle.IncludeQuotes) + app.put(input.front); + input.popFront(); + ++index; + break; + default: + break; + } + } + t.value = to!string(app.data); + index = r.index; + return t; +} + +unittest +{ + uint i; + uint l; + auto a = "q{import std.stdio;}"; + auto ar = lexTokenString(a, i, l); + assert (ar == TokenType.StringLiteral); + assert (ar == "import std.stdio;"); + + auto b = `q{writeln("hello world");}`; + auto br = lexTokenString(b, i, l, StringStyle.Source); + assert (br == TokenType.StringLiteral); + assert (br == `q{writeln("hello world");}`); +} + Token lexNumber(R)(ref R input, ref uint index, const uint lineNumber) in { @@ -635,7 +1497,7 @@ in } body { - auto app = appender!(char[])(); + auto app = appender!(ElementType!(R)[])(); // hex and binary can start with zero, anything else is decimal if (input.front != '0') return lexDecimal(input, index, lineNumber, app); @@ -672,8 +1534,8 @@ unittest assert (lexNumber(a, i, l) == "0"); } -Token lexBinary(R)(ref R input, ref uint index, const uint lineNumber, - ref typeof(appender!(char[])()) app) +Token lexBinary(R, A)(ref R input, ref uint index, const uint lineNumber, + ref A app) { Token token; token.lineNumber = lineNumber; @@ -777,8 +1639,8 @@ unittest } -Token lexDecimal(R)(ref R input, ref uint index, const uint lineNumber, - ref typeof(appender!(char[])()) app) +Token lexDecimal(R, A)(ref R input, ref uint index, const uint lineNumber, + ref A app) { bool lexingSuffix = false; bool isLong = false; @@ -940,7 +1802,8 @@ Token lexDecimal(R)(ref R input, ref uint index, const uint lineNumber, } -unittest { +unittest +{ uint i; uint l; auto a = "55e-4"; @@ -1044,8 +1907,8 @@ unittest { assert (xr == TokenType.DoubleLiteral); } -Token lexHex(R)(ref R input, ref uint index, const uint lineNumber, - ref typeof(appender!(char[])()) app) +Token lexHex(R, A)(ref R input, ref uint index, const uint lineNumber, + ref A app) { bool isLong = false; bool isUnsigned = false; @@ -1208,10 +2071,6 @@ unittest assert (pr == TokenType.DoubleLiteral); } -/** - * Returns: true if ch marks the ending of one token and the beginning of - * another, false otherwise - */ pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C) { switch (ch) @@ -1230,260 +2089,278 @@ pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C) } } -/** - * Configure the tokenize() function - */ -enum IterationStyle +pure nothrow TokenType lookupTokenType(const string input) { - /// Only include code, not whitespace or comments - CodeOnly = 0, - /// Includes comments - IncludeComments = 0b01, - /// Includes whitespace - IncludeWhitespace = 0b10, - /// Include everything - Everything = IncludeComments | IncludeWhitespace -} - -/** - * Configuration of the token lexing style - */ -enum StringStyle : uint -{ - /// Escape sequences will be replaced with their equivalent characters. - /// Quote characters will not be included - Default = 0b0000, - - /// Escape sequences will not be processed - NotEscaped = 0b0001, - - /// Strings will include their opening and closing quote characters as well - /// as any prefixes or suffixes (e.g.: "abcde"w will include the 'w' - /// character) - IncludeQuotes = 0x0010, - - /// Strings will be read exactly as they appeared in the source, including - /// their opening and closing quote characters. Useful for syntax highlighting. - Source = NotEscaped | IncludeQuotes, -} - -TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly, - const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R))) -{ - auto r = TokenRange!(R)(range); - r.stringStyle = stringStyle; - r.iterStyle = iterationStyle; - r.lineNumber = 1; - r.popFront(); - return r; -} - -struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R))) -{ - this(ref R range) + switch(input.length) { - this.range = range; - } - - bool empty() @property - { - return _empty; - } - - Token front() const @property - { - enforce(!_empty, "Cannot call popFront() on empty token range"); - return current; - } - - Token popFront() - { - if (range.isEoF()) + case 2: + switch (input) { - _empty = true; - return current; + case "do": return TokenType.Do; + case "if": return TokenType.If; + case "in": return TokenType.In; + case "is": return TokenType.Is; + default: break; } - - Token c = current; - current = Token.init; - current.lineNumber = lineNumber; - current.startIndex = index; - - while (std.uni.isWhite(range.front)) + break; + case 3: + switch (input) { - if (iterStyle == IterationStyle.Everything) + case "asm": return TokenType.Asm; + case "for": return TokenType.For; + case "int": return TokenType.Int; + case "new": return TokenType.New; + case "out": return TokenType.Out; + case "ref": return TokenType.Ref; + case "try": return TokenType.Try; + default: break; + } + break; + case 4: + switch (input) + { + case "auto": return TokenType.Auto; + case "body": return TokenType.Body; + case "bool": return TokenType.Bool; + case "byte": return TokenType.Byte; + case "case": return TokenType.Case; + case "cast": return TokenType.Cast; + case "cent": return TokenType.Cent; + case "char": return TokenType.Char; + case "else": return TokenType.Else; + case "enum": return TokenType.Enum; + case "goto": return TokenType.Goto; + case "lazy": return TokenType.Lazy; + case "long": return TokenType.Long; + case "null": return TokenType.Null; + case "pure": return TokenType.Pure; + case "real": return TokenType.Real; + case "this": return TokenType.This; + case "true": return TokenType.True; + case "uint": return TokenType.Uint; + case "void": return TokenType.Void; + case "with": return TokenType.With; + default: break; + } + break; + case 5: + switch (input) + { + case "alias": return TokenType.Alias; + case "align": return TokenType.Align; + case "break": return TokenType.Break; + case "catch": return TokenType.Catch; + case "class": return TokenType.Class; + case "const": return TokenType.Const; + case "creal": return TokenType.Creal; + case "dchar": return TokenType.Dchar; + case "debug": return TokenType.Debug; + case "false": return TokenType.False; + case "final": return TokenType.Final; + case "float": return TokenType.Float; + case "inout": return TokenType.Inout; + case "ireal": return TokenType.Ireal; + case "macro": return TokenType.Macro; + case "mixin": return TokenType.Mixin; + case "scope": return TokenType.Scope; + case "short": return TokenType.Short; + case "super": return TokenType.Super; + case "throw": return TokenType.Throw; + case "ubyte": return TokenType.Ubyte; + case "ucent": return TokenType.Ucent; + case "ulong": return TokenType.Ulong; + case "union": return TokenType.Union; + case "wchar": return TokenType.Wchar; + case "while": return TokenType.While; + default: break; + } + break; + case 6: + switch (input) + { + case "assert": return TokenType.Assert; + case "cfloat": return TokenType.Cfloat; + case "delete": return TokenType.Delete; + case "double": return TokenType.Double; + case "export": return TokenType.Export; + case "extern": return TokenType.Extern; + case "ifloat": return TokenType.Ifloat; + case "import": return TokenType.Import; + case "module": return TokenType.Module; + case "pragma": return TokenType.Pragma; + case "public": return TokenType.Public; + case "return": return TokenType.Return; + case "shared": return TokenType.Shared; + case "static": return TokenType.Static; + case "string": return TokenType.String; + case "struct": return TokenType.Struct; + case "switch": return TokenType.Switch; + case "typeid": return TokenType.Typeid; + case "typeof": return TokenType.Typeof; + case "ushort": return TokenType.Ushort; + default: break; + } + break; + case 7: + switch (input) + { + case "cdouble": return TokenType.Cdouble; + case "default": return TokenType.Default; + case "dstring": return TokenType.DString; + case "finally": return TokenType.Finally; + case "foreach": return TokenType.Foreach; + case "idouble": return TokenType.Idouble; + case "nothrow": return TokenType.Nothrow; + case "package": return TokenType.Package; + case "private": return TokenType.Private; + case "typedef": return TokenType.Typedef; + case "version": return TokenType.Version; + case "wstring": return TokenType.WString; + default: break; + } + break; + case 8: + switch (input) + { + case "override": return TokenType.Override; + case "continue": return TokenType.Continue; + case "__LINE__": return TokenType.Line; + case "template": return TokenType.Template; + case "abstract": return TokenType.Abstract; + case "__thread": return TokenType.Thread; + case "__traits": return TokenType.Traits; + case "volatile": return TokenType.Volatile; + case "delegate": return TokenType.Delegate; + case "function": return TokenType.Function; + case "unittest": return TokenType.Unittest; + case "__FILE__": return TokenType.File; + default: break; + } + break; + case 9: + switch (input) + { + case "__gshared": return TokenType.Gshared; + case "immutable": return TokenType.Immutable; + case "interface": return TokenType.Interface; + case "invariant": return TokenType.Invariant; + case "protected": return TokenType.Protected; + default: break; + } + break; + case 10: + if (input == "deprecated") + return TokenType.Deprecated; + break; + case 11: + if (input == "synchronized") + return TokenType.Synchronized; + break; + case 13: + if (input == "foreach_reverse") + return TokenType.Foreach_reverse; + break; + default: break; + } + return TokenType.Identifier; +} + +class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) +{ + /** + * Adds the given value to the trie with the given key + */ + void add(K key, V value) pure + { + TrieNode!(K,V) current = this; + foreach(keyPart; key) + { + if ((keyPart in current.children) is null) { - current = lexWhitespace(range, index, lineNumber); - return c; + auto node = new TrieNode!(K, V); + current.children[keyPart] = node; + current = node; } else - lexWhitespace(range, index, lineNumber); + current = current.children[keyPart]; } - outer: switch (range.front) - { - mixin(generateCaseTrie( - "=", "TokenType.Assign", - "&", "TokenType.BitAnd", - "&=", "TokenType.BitAndEquals", - "|", "TokenType.BitOr", - "|=", "TokenType.BitOrEquals", - "~=", "TokenType.CatEquals", - ":", "TokenType.Colon", - ",", "TokenType.Comma", - "$", "TokenType.Dollar", - ".", "TokenType.Dot", - "==", "TokenType.Equals", - "=>", "TokenType.GoesTo", - ">", "TokenType.Greater", - ">=", "TokenType.GreaterEqual", - "#", "TokenType.Hash", - "&&", "TokenType.LogicAnd", - "{", "TokenType.LBrace", - "[", "TokenType.LBracket", - "<", "TokenType.Less", - "<=", "TokenType.LessEqual", - "<>=", "TokenType.LessEqualGreater", - "<>", "TokenType.LessOrGreater", - "||", "TokenType.LogicOr", - "(", "TokenType.LParen", - "-", "TokenType.Minus", - "-=", "TokenType.MinusEquals", - "%", "TokenType.Mod", - "%=", "TokenType.ModEquals", - "*=", "TokenType.MulEquals", - "!", "TokenType.Not", - "!=", "TokenType.NotEquals", - "!>", "TokenType.NotGreater", - "!>=", "TokenType.NotGreaterEqual", - "!<", "TokenType.NotLess", - "!<=", "TokenType.NotLessEqual", - "!<>", "TokenType.NotLessEqualGreater", - "+", "TokenType.Plus", - "+=", "TokenType.PlusEquals", - "^^", "TokenType.Pow", - "^^=", "TokenType.PowEquals", - "}", "TokenType.RBrace", - "]", "TokenType.RBracket", - ")", "TokenType.RParen", - ";", "TokenType.Semicolon", - "<<", "TokenType.ShiftLeft", - "<<=", "TokenType.ShiftLeftEqual", - ">>", "TokenType.ShiftRight", - ">>=", "TokenType.ShiftRightEqual", - "..", "TokenType.Slice", - "*", "TokenType.Star", - "?", "TokenType.Ternary", - "~", "TokenType.Tilde", - "--", "TokenType.Decrement", - "!<>=", "TokenType.Unordered", - ">>>", "TokenType.UnsignedShiftRight", - ">>>=", "TokenType.UnsignedShiftRightEqual", - "++", "TokenType.Increment", - "...", "TokenType.Vararg", - "^", "TokenType.Xor", - "^=", "TokenType.XorEquals", - "@", "TokenType.At", - )); - case '0': .. case '9': - current = lexNumber(range, index, lineNumber); - break; - case '\'': - case '"': - current = lexString(range, index, lineNumber, stringStyle); - break; - case '`': - current = lexString(range, index, lineNumber, stringStyle); - break; - case 'q': - /+auto r = range.save; - r.popFront(); - if (!r.isEoF() && r.front == '{') - { - writeln("ParseTokenString"); - break; - } - else+/ - goto default; - case '/': - auto r = range.save(); - r.popFront(); - if (r.isEoF()) - { - current.type = TokenType.Div; - current.value = "/"; - range.popFront(); - ++index; - break; - } - switch (r.front) - { - case '/': - case '*': - case '+': - current = lexComment(range, index, lineNumber); - break outer; - case '=': - current.type = TokenType.DivEquals; - current.value = "/="; - range.popFront(); - range.popFront(); - index += 2; - break outer; - default: - current.type = TokenType.Div; - current.value = "/"; - ++index; - range.popFront(); - break outer; - } - case 'r': - auto r = range.save(); - r.popFront(); - if (!r.isEoF() && r.front == '"') - { - current = lexString(range, index, lineNumber, stringStyle); - break; - } - else - goto default; - case 'x': - auto r = range.save(); - r.popFront(); - if (!r.isEoF() && r.front == '"') - { - current = lexHexString(range, index, lineNumber); - break; - } - else - goto default; - default: - auto app = appender!(ElementType!(R)[])(); - while(!range.isEoF() && !isSeparating(range.front)) - { - app.put(range.front); - range.popFront(); - ++index; - } - current.value = to!string(app.data); - current.type = lookupTokenTypeOptimized(current.value); - break; - } - return c; + current.value = value; } - -private: - Token current; - uint lineNumber; - uint index; - R range; - bool _empty; - IterationStyle iterStyle; - StringStyle stringStyle; } -unittest +class TrieNode(K, V) if (isInputRange!K) { - auto c = `r"d:\path\foo.bat"`; - foreach (t; byToken(c, IterationStyle.CodeOnly, StringStyle.Source)) - writeln(t.type, ": {", t.value, "}"); + V value; + TrieNode!(K,V)[ElementType!K] children; +} + +string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) +{ + string caseStatement = ""; + foreach(dchar k, TrieNode!(K,V) v; node.children) + { + caseStatement ~= indentString; + caseStatement ~= "case '"; + caseStatement ~= k; + caseStatement ~= "':\n"; + caseStatement ~= indentString; + caseStatement ~= "\tcurrent.value ~= '"; + caseStatement ~= k; + caseStatement ~= "';\n"; + caseStatement ~= indentString; + caseStatement ~= "\t++index;\n"; + caseStatement ~= indentString; + caseStatement ~= "\trange.popFront();\n"; + if (v.children.length > 0) + { + caseStatement ~= indentString; + caseStatement ~= "\tif (range.isEoF())\n"; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + caseStatement ~= indentString; + caseStatement ~= "\tswitch (range.front)\n"; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= printCaseStatements(v, indentString ~ "\t"); + caseStatement ~= indentString; + caseStatement ~= "\tdefault:\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tbreak;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + caseStatement ~= indentString; + caseStatement ~= "\tbreak;\n"; + } + else + { + caseStatement ~= indentString; + caseStatement ~= "\tcurrent.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\tbreak;\n"; + } + } + return caseStatement; +} + +string generateCaseTrie(string[] args ...) +{ + auto t = new Trie!(string, string); + for(int i = 0; i < args.length; i+=2) + { + t.add(args[i], args[i+1]); + } + return printCaseStatements(t, ""); }