diff --git a/.gitmodules b/.gitmodules
old mode 100755
new mode 100644
index e69de29..c34a4b6
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "datapicked"]
+ path = datapicked
+ url = ./datapicked/
diff --git a/astprinter.d b/astprinter.d
index ec3921a..ae2e855 100644
--- a/astprinter.d
+++ b/astprinter.d
@@ -469,9 +469,9 @@ class XMLPrinter : ASTVisitor
output.writeln("
]"); - foreach (Token t; tokens) + while (!tokens.empty) { + auto t = tokens.front; + tokens.popFront(); if (isBasicType(t.type)) writeSpan("type", str(t.type)); else if (isKeyword(t.type)) diff --git a/main.d b/main.d index 33cfdf6..def1c6d 100644 --- a/main.d +++ b/main.d @@ -17,13 +17,14 @@ import std.stdio; import std.range; import stdx.d.lexer; import stdx.d.parser; +import dpick.buffer.buffer; import highlighter; -import stats; -import ctags; -import astprinter; -import imports; -import outliner; +//import stats; +//import ctags; +//import astprinter; +//import imports; +//import outliner; int main(string[] args) { @@ -91,69 +92,69 @@ int main(string[] args) { bool usingStdin = args.length == 1; ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]); - highlighter.highlight(byToken!(typeof(bytes), false, false)(bytes), - args.length == 1 ? "stdin" : args[1]); + auto tokens = DLexer!(ubyte[])(bytes); + highlighter.highlight(tokens, args.length == 1 ? "stdin" : args[1]); return 0; } - else if (ctags) - { - stdout.printCtags(expandArgs(args, recursive)); - } - else - { - bool usingStdin = args.length == 1; - if (sloc || tokenCount) - { - if (usingStdin) - { - auto tokens = byToken!(ubyte[], false, false)(readStdin()); - if (tokenCount) - printTokenCount(stdout, "stdin", tokens); - else - printLineCount(stdout, "stdin", tokens); - } - else - { - ulong count; - foreach (f; expandArgs(args, recursive)) - { - auto tokens = byToken!(ubyte[])(readFile(f)); - if (tokenCount) - count += printTokenCount(stdout, f, tokens); - else - count += printLineCount(stdout, f, tokens); - } - writefln("total:\t%d", count); - } - } - else if (syntaxCheck) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - } - else if (imports) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto visitor = new ImportPrinter; - visitor.visit(mod); - } - else if (ast) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto printer = new XMLPrinter; - printer.output = stdout; - printer.visit(mod); - } - else if (outline) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto outliner = new Outliner(stdout); - outliner.visit(mod); - } - } +// else if (ctags) +// { +// stdout.printCtags(expandArgs(args, recursive)); +// } +// else +// { +// bool usingStdin = args.length == 1; +// if (sloc || tokenCount) +// { +// if (usingStdin) +// { +// auto tokens = byToken!(ubyte[], false, false)(readStdin()); +// if (tokenCount) +// printTokenCount(stdout, "stdin", tokens); +// else +// printLineCount(stdout, "stdin", tokens); +// } +// else +// { +// ulong count; +// foreach (f; expandArgs(args, recursive)) +// { +// auto tokens = byToken!(ubyte[])(readFile(f)); +// if (tokenCount) +// count += printTokenCount(stdout, f, tokens); +// else +// count += printLineCount(stdout, f, tokens); +// } +// writefln("total:\t%d", count); +// } +// } +// else if (syntaxCheck) +// { +// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); +// parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); +// } +// else if (imports) +// { +// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); +// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); +// auto visitor = new ImportPrinter; +// visitor.visit(mod); +// } +// else if (ast) +// { +// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); +// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); +// auto printer = new XMLPrinter; +// printer.output = stdout; +// printer.visit(mod); +// } +// else if (outline) +// { +// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); +// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); +// auto outliner = new Outliner(stdout); +// outliner.visit(mod); +// } +// } return 0; } diff --git a/main.html b/main.html deleted file mode 100644 index 6f0976f..0000000 --- a/main.html +++ /dev/null @@ -1,276 +0,0 @@ - - - - - -main.d - - - --// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module main; - -import std.algorithm; -import std.array; -import std.conv; -import std.file; -import std.getopt; -import std.parallelism; -import std.path; -import std.regex; -import std.stdio; -import std.range; -import stdx.d.lexer; -import stdx.d.parser; - -import highlighter; -import stats; -import ctags; -import astprinter; -import imports; -import outliner; - -int main(string[] args) -{ - bool sloc; - bool highlight; - bool ctags; - bool recursive; - bool format; - bool help; - bool tokenCount; - bool syntaxCheck; - bool ast; - bool imports; - bool muffin; - bool outline; - - try - { - getopt(args, "sloc|l", &sloc, "highlight", &highlight, - "ctags|c", &ctags, "recursive|r|R", &recursive, "help|h", &help, - "tokenCount|t", &tokenCount, "syntaxCheck|s", &syntaxCheck, - "ast|xml", &ast, "imports|i", &imports, "outline|o", &outline, - "muffinButton", &muffin); - } - catch (Exception e) - { - stderr.writeln(e.msg); - } - - if (muffin) - { - stdout.writeln( -` ___________ - __(#*O 0** @%*)__ - _(%*o#*O%*0 #O#%##@)_ - (*#@%#o*@ #o%O*%@ #o #) - \=====================/ - |I|I|I|I|I|I|I|I|I|I| - |I|I|I|I|I|I|I|I|I|I| - |I|I|I|I|I|I|I|I|I|I| - |I|I|I|I|I|I|I|I|I|I|`); - return 0; - } - - if (help) - { - printHelp(args[0]); - return 0; - } - - auto optionCount = count!"a"([sloc, highlight, ctags, tokenCount, - syntaxCheck, ast, imports, outline]); - if (optionCount > 1) - { - stderr.writeln("Too many options specified"); - return 1; - } - else if (optionCount < 1) - { - printHelp(args[0]); - return 1; - } - - if (highlight) - { - bool usingStdin = args.length == 1; - ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]); - highlighter.highlight(byToken!(typeof(bytes), false, false)(bytes), - args.length == 1 ? "stdin" : args[1]); - return 0; - } - else if (ctags) - { - stdout.printCtags(expandArgs(args, recursive)); - } - else - { - bool usingStdin = args.length == 1; - if (sloc || tokenCount) - { - if (usingStdin) - { - auto tokens = byToken!(ubyte[], false, false)(readStdin()); - if (tokenCount) - printTokenCount(stdout, "stdin", tokens); - else - printLineCount(stdout, "stdin", tokens); - } - else - { - ulong count; - foreach (f; expandArgs(args, recursive)) - { - auto tokens = byToken!(ubyte[])(readFile(f)); - if (tokenCount) - count += printTokenCount(stdout, f, tokens); - else - count += printLineCount(stdout, f, tokens); - } - writefln("total:\t%d", count); - } - } - else if (syntaxCheck) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - } - else if (imports) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto visitor = new ImportPrinter; - visitor.visit(mod); - } - else if (ast) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto printer = new XMLPrinter; - printer.output = stdout; - printer.visit(mod); - } - else if (outline) - { - auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); - auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); - auto outliner = new Outliner(stdout); - outliner.visit(mod); - } - } - return 0; -} - -string[] expandArgs(string[] args, bool recursive) -{ - if (recursive) - { - string[] rVal; - foreach (arg; args[1 ..$]) - { - if (isFile(arg) && arg.endsWith(`.d`) || arg.endsWith(`.di`)) - rVal abstract arg; - else foreach (item; dirEntries(arg, SpanMode.breadth).map!(a => a.name)) - { - if (isFile(item) && (item.endsWith(`.d`) || item.endsWith(`.di`))) - rVal abstract item; - else - continue; - } - } - return rVal; - } - else - return args[1 .. $]; -} - -ubyte[] readStdin() -{ - auto sourceCode = appender!(ubyte[])(); - ubyte[4096] buf; - while (true) - { - auto b = stdin.rawRead(buf); - if (b.length == 0) - break; - sourceCode.put(b); - } - return sourceCode.data; -} - -ubyte[] readFile(string fileName) -{ - if (!exists(fileName)) - { - stderr.writefln("%s does not exist", fileName); - return []; - } - File f = File(fileName); - ubyte[] sourceCode = uninitializedArray!(ubyte[])(to!size_t(f.size)); - f.rawRead(sourceCode); - return sourceCode; -} - -void printHelp(string programName) -{ - stderr.writefln( -` - Usage: %s options - -options: - --help | -h - Prints this help message - - --sloc | -l [sourceFiles] - Prints the number of logical lines of code in the given - source files. If no files are specified, input is read from stdin. - - --tokenCount | t [sourceFiles] - Prints the number of tokens in the given source files. If no files are - specified, input is read from stdin. - - --highlight [sourceFile] - Syntax-highlight the given source file. The - resulting HTML will be written to standard output. If no files are - specified, input is read from stdin. - - --imports | -i [sourceFile] - Prints modules imported by the given source file. If no files are - specified, input is read from stdin. - - --syntaxCheck | -s [sourceFile] - Lexes and parses sourceFile, printing the line and column number of any - syntax errors to stdout. One error or warning is printed per line. - If no files are specified, input is read from stdin. - - --ctags | -c sourceFile - Generates ctags information from the given source code file. Note that - ctags information requires a filename, so stdin cannot be used in place - of a filename. - - --ast | --xml sourceFile - Generates an XML representation of the source files abstract syntax - tree. If no files are specified, input is read from stdin. - - --recursive | -R | -r - When used with --ctags, --tokenCount, or --sloc, dscanner will produce - ctags output for all .d and .di files contained within the given - directories and its sub-directories.`, - programName); -} -- diff --git a/stdx/d/ast.d b/stdx/d/ast.d index 691ce97..ba948d0 100644 --- a/stdx/d/ast.d +++ b/stdx/d/ast.d @@ -1203,13 +1203,13 @@ class ForStatement : ASTNode public: override void accept(ASTVisitor visitor) { - mixin (visitIfNotNull!(declarationOrStatement, test, increment, - statementNoCaseNoDefault)); + mixin (visitIfNotNull!(initialization, test, increment, + declarationOrStatement)); } - /** */ DeclarationOrStatement declarationOrStatement; + /** */ DeclarationOrStatement initialization; /** */ ExpressionStatement test; /** */ Expression increment; - /** */ StatementNoCaseNoDefault statementNoCaseNoDefault; + /** */ DeclarationOrStatement declarationOrStatement; /** */ size_t startIndex; } @@ -2760,11 +2760,11 @@ class WhileStatement : ASTNode public: override void accept(ASTVisitor visitor) { - mixin (visitIfNotNull!(expression, statementNoCaseNoDefault)); + mixin (visitIfNotNull!(expression, declarationOrStatement)); } /** */ Expression expression; - /** */ StatementNoCaseNoDefault statementNoCaseNoDefault; + /** */ DeclarationOrStatement declarationOrStatement; /** */ size_t startIndex; } diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index f185d37..ed0bf56 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -17,7 +17,7 @@ private enum staticTokens = [ private enum pseudoTokens = [ "\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6", - "7", "8", "9", "#", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!", + "7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!", "\u2028", "\u2029" ]; @@ -57,24 +57,24 @@ public template tok(string token) } public alias stdx.lexer.TokenStructure!(IdType) Token; -public auto byToken(R, bool skipComments = true, bool skipWhitespace = true)(R range) -{ - pure nothrow bool isNotComment(const Token t) { return t.type != tok!"comment"; } - pure nothrow bool isNotWhitespace(const Token t) { return t.type != tok!"whitespace"; } - pure nothrow bool isNotEither(const Token t) { return t.type != tok!"whitespace" && t.type != tok!"comment"; } - - static if (skipComments) - { - static if (skipWhitespace) - return DLexer!(R)(range).filter!isNotEither; - else - return DLexer!(R)(range).filter!isNotComment; - } - else static if (skipWhitespace) - return DLexer!(R)(range).filter!isNotWhitespace; - else - return DLexer!(R)(range); -} +//public auto byToken(R, bool skipComments = true, bool skipWhitespace = true)(R range) +//{ +// pure nothrow bool isNotComment(const Token t) { return t.type != tok!"comment"; } +// pure nothrow bool isNotWhitespace(const Token t) { return t.type != tok!"whitespace"; } +// pure nothrow bool isNotEither(const Token t) { return t.type != tok!"whitespace" && t.type != tok!"comment"; } +// return new DLexer!(R)(range); +// static if (skipComments) +// { +// static if (skipWhitespace) +// return filter!isNotEither(tokens); +// else +// return filter!isNotComment(tokens); +// } +// else static if (skipWhitespace) +// return filter!isNotWhitespace(tokens); +// else +// return tokens; +//} public bool isBasicType(IdType type) nothrow pure @safe { @@ -322,45 +322,50 @@ public struct DLexer(R) { import std.conv; import core.vararg; - - mixin Lexer!(R, IdType, Token, isSeparating, lexIdentifier, staticTokens, - dynamicTokens, pseudoTokens, possibleDefaultTokens); + import dpick.buffer.buffer; + + private enum pseudoTokenHandlers = [ + "\"", "lexStringLiteral", + "`", "lexWysiwygString", + "//", "lexSlashSlashComment", + "/*", "lexSlashStarComment", + "/+", "lexSlashPlusComment", + ".", "lexDot", + "'", "lexCharacterLiteral", + "0", "lexNumber", + "1", "lexNumber", + "2", "lexNumber", + "3", "lexNumber", + "4", "lexNumber", + "5", "lexNumber", + "6", "lexNumber", + "7", "lexNumber", + "8", "lexNumber", + "9", "lexNumber", + "q\"", "lexDelimitedString", + "q{", "lexTokenString", + "r\"", "lexWysiwygString", + "x\"", "lexHexString", + " ", "lexWhitespace", + "\t", "lexWhitespace", + "\r", "lexWhitespace", + "\n", "lexWhitespace", + "\u2028", "lexLongNewline", + "\u2029", "lexLongNewline", + "#!", "lexScriptLine" + ]; + + mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens, + dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens); + + private alias typeof(range).Mark Mark; this(R range) { - registerPostProcess!"\""(&lexStringLiteral); - registerPostProcess!"`"(&lexWysiwygString); - registerPostProcess!"//"(&lexSlashSlashComment); - registerPostProcess!"/*"(&lexSlashStarComment); - registerPostProcess!"/+"(&lexSlashPlusComment); - registerPostProcess!"."(&lexDot); - registerPostProcess!"'"(&lexCharacterLiteral); - registerPostProcess!"0"(&lexNumber); - registerPostProcess!"1"(&lexNumber); - registerPostProcess!"2"(&lexNumber); - registerPostProcess!"3"(&lexNumber); - registerPostProcess!"4"(&lexNumber); - registerPostProcess!"5"(&lexNumber); - registerPostProcess!"6"(&lexNumber); - registerPostProcess!"7"(&lexNumber); - registerPostProcess!"8"(&lexNumber); - registerPostProcess!"9"(&lexNumber); - registerPostProcess!"#"(&lexNumber); - registerPostProcess!"q\""(&lexDelimitedString); - registerPostProcess!"q{"(&lexTokenString); - registerPostProcess!"r\""(&lexWysiwygString); - registerPostProcess!"x\""(&lexHexString); - registerPostProcess!" "(&lexWhitespace); - registerPostProcess!"\t"(&lexWhitespace); - registerPostProcess!"\r"(&lexWhitespace); - registerPostProcess!"\n"(&lexWhitespace); - registerPostProcess!"\u2028"(&lexLongNewline); - registerPostProcess!"\u2029"(&lexLongNewline); - this.range = RangeType(range); - popFront(); + this.range = LexerRange!(typeof(buffer(range)))(buffer(range)); } - bool isWhitespace() pure const nothrow + bool isWhitespace() pure /*const*/ nothrow { switch (range.front) { @@ -370,10 +375,10 @@ public struct DLexer(R) case '\t': return true; case 0xe2: - if (!range.canPeek(2)) - return false; - return range.peek() == 0x80 - && (range.peek(2) == 0xa8 || range.peek(2) == 0xa9); + auto peek = range.lookahead(2); + return peek.length == 2 + && peek[0] == 0x80 + && (peek[1] == 0xa8 || peek[1] == 0xa9); default: return false; } @@ -398,8 +403,9 @@ public struct DLexer(R) range.incrementLine(); return; case 0xe2: - if (range.canPeek(2) && range.peek() == 0x80 - && (range.peek(2) == 0xa8 || range.peek(2) == 0xa9)) + auto lookahead = range.lookahead(3); + if (lookahead.length == 3 && lookahead[1] == 0x80 + && (lookahead[2] == 0xa8 || lookahead[2] == 0xa9)) { range.popFront(); range.popFront(); @@ -420,7 +426,7 @@ public struct DLexer(R) Token lexWhitespace() pure nothrow { - range.mark(); + auto mark = range.mark(); loop: do { switch (range.front) @@ -440,11 +446,12 @@ public struct DLexer(R) range.popFront(); break; case 0xe2: - if (!range.canPeek(2)) + auto lookahead = range.lookahead(3); + if (lookahead.length != 3) break loop; - if (range.peek() != 0x80) + if (lookahead[1] != 0x80) break loop; - if (range.peek(2) == 0xa8 || range.peek(2) == 0xa9) + if (lookahead[2] == 0xa8 || lookahead[2] == 0xa9) { range.popFront(); range.popFront(); @@ -457,36 +464,43 @@ public struct DLexer(R) break loop; } } while (!range.empty); - return Token(tok!"whitespace", cast(string) range.getMarked(), range.line, + return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexNumber() pure nothrow { - range.mark(); - if (range.front == '0') + auto mark = range.mark(); + auto lookahead = range.lookahead(1); + if (range.front == '0' && lookahead.length == 1) { - switch (range.peek()) + switch (lookahead[0]) { case 'x': case 'X': range.popFront(); range.popFront(); - return lexHex(); + return lexHex(mark); case 'b': case 'B': range.popFront(); range.popFront(); - return lexBinary(); + return lexBinary(mark); default: - return lexDecimal(); + return lexDecimal(mark); } } else - return lexDecimal(); + return lexDecimal(mark); } Token lexHex() pure nothrow + { + auto mark = range.mark(); + return lexHex(mark); + } + + Token lexHex(Mark mark) pure nothrow { IdType type = tok!"intLiteral"; bool foundDot; @@ -526,7 +540,7 @@ public struct DLexer(R) case '.': if (foundDot) break hexLoop; - if (range.canPeek() && range.peek() == '.') + if (range.lookahead(1).length && range.lookahead(1)[0] == '.') break hexLoop; range.popFront(); foundDot = true; @@ -536,11 +550,17 @@ public struct DLexer(R) break hexLoop; } } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexBinary() pure nothrow + { + auto mark = range.mark(); + return lexBinary(mark); + } + + Token lexBinary(Mark mark) pure nothrow { IdType type = tok!"intLiteral"; binaryLoop: while (!range.empty) @@ -561,11 +581,11 @@ public struct DLexer(R) break binaryLoop; } } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } - Token lexDecimal() pure nothrow + Token lexDecimal(Mark mark) pure nothrow { bool foundDot = range.front == '.'; IdType type = tok!"intLiteral"; @@ -608,16 +628,17 @@ public struct DLexer(R) case '.': if (foundDot) break decimalLoop; - if (range.canPeek() && range.peek() == '.') + auto lookahead = range.lookahead(1); + if (lookahead.length == 1 && lookahead[0] == '.') break decimalLoop; else { // The following bit of silliness tries to tell the // difference between "int dot identifier" and // "double identifier". - if (range.canPeek()) + if (lookahead.length == 1) { - switch (range.peek()) + switch (lookahead[0]) { case '0': .. case '9': goto doubleLiteral; @@ -638,7 +659,7 @@ public struct DLexer(R) break decimalLoop; } } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } @@ -749,7 +770,7 @@ public struct DLexer(R) Token lexSlashStarComment() pure { - range.mark(); + auto mark = range.mark(); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -767,13 +788,13 @@ public struct DLexer(R) else popFrontWhitespaceAware(); } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexSlashSlashComment() pure nothrow { - range.mark(); + auto mark = range.mark(); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -783,13 +804,13 @@ public struct DLexer(R) break; range.popFront(); } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexSlashPlusComment() pure nothrow { - range.mark(); + auto mark = range.mark(); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -817,13 +838,13 @@ public struct DLexer(R) else popFrontWhitespaceAware(); } - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexStringLiteral() pure nothrow { - range.mark(); + auto mark = range.mark(); range.popFront(); while (true) { @@ -846,13 +867,13 @@ public struct DLexer(R) } IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexWysiwygString() pure nothrow { - range.mark(); + auto mark = range.mark(); IdType type = tok!"stringLiteral"; bool backtick = range.front == '`'; if (backtick) @@ -900,7 +921,7 @@ public struct DLexer(R) } } lexStringSuffix(type); - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } @@ -922,7 +943,7 @@ public struct DLexer(R) Token lexDelimitedString() pure nothrow { - range.mark(); + auto mark = range.mark(); range.popFront(); range.popFront(); ElementEncodingType!R open; @@ -933,29 +954,29 @@ public struct DLexer(R) open = '<'; close = '>'; range.popFront(); - return lexNormalDelimitedString(open, close); + return lexNormalDelimitedString(mark, open, close); case '{': open = '{'; close = '}'; range.popFront(); - return lexNormalDelimitedString(open, close); + return lexNormalDelimitedString(mark, open, close); case '[': open = '['; close = ']'; range.popFront(); - return lexNormalDelimitedString(open, close); + return lexNormalDelimitedString(mark, open, close); case '(': open = '('; close = ')'; range.popFront(); - return lexNormalDelimitedString(open, close); + return lexNormalDelimitedString(mark, open, close); default: return lexHeredocString(); } } - Token lexNormalDelimitedString(ElementEncodingType!RangeType open, - ElementEncodingType!RangeType close) pure nothrow + Token lexNormalDelimitedString(Mark mark, ElementEncodingType!R open, + ElementEncodingType!R close) pure nothrow { int depth = 1; while (!range.empty && depth > 0) @@ -985,7 +1006,7 @@ public struct DLexer(R) } IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.getMarked(), range.line, range.column, range.index); + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } Token lexHeredocString() pure nothrow @@ -1024,7 +1045,7 @@ public struct DLexer(R) Token lexHexString() pure nothrow { - range.mark(); + auto mark = range.mark(); range.popFront(); range.popFront(); @@ -1055,7 +1076,7 @@ public struct DLexer(R) IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.getMarked(), range.line, range.column, + return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); } @@ -1154,7 +1175,7 @@ public struct DLexer(R) Token lexCharacterLiteral() pure nothrow { - range.mark(); + auto mark = range.mark(); range.popFront(); if (range.front == '\\') { @@ -1164,7 +1185,7 @@ public struct DLexer(R) else if (range.front == '\'') { range.popFront(); - return Token(tok!"characterLiteral", cast(string) range.getMarked(), + return Token(tok!"characterLiteral", cast(string) range.slice(mark), range.line, range.column, range.index); } else if (range.front & 0x80) @@ -1182,7 +1203,7 @@ public struct DLexer(R) if (range.front == '\'') { range.popFront(); - return Token(tok!"characterLiteral", cast(string) range.getMarked(), + return Token(tok!"characterLiteral", cast(string) range.slice(mark), range.line, range.column, range.index); } else @@ -1194,30 +1215,31 @@ public struct DLexer(R) Token lexIdentifier() pure nothrow { - range.mark(); + auto mark = range.mark(); while (!range.empty && !isSeparating(range.front)) { range.popFront(); } - return Token(tok!"identifier", cast(string) range.getMarked(), range.index, + return Token(tok!"identifier", cast(string) range.slice(mark), range.index, range.line, range.column); } Token lexDot() pure nothrow { - if (!range.canPeek) + auto lookahead = range.lookahead(1); + if (lookahead.length == 0) { range.popFront(); return Token(tok!".", null, range.line, range.column, range.index); } - switch (range.peek()) + switch (lookahead[0]) { case '0': .. case '9': return lexNumber(); case '.': range.popFront(); range.popFront(); - if (range.front == '.') + if (!range.empty && range.front == '.') { range.popFront(); return Token(tok!"...", null, range.line, range.column, range.index); @@ -1232,16 +1254,21 @@ public struct DLexer(R) Token lexLongNewline() pure nothrow { - range.mark(); + auto mark = range.mark(); range.popFront(); range.popFront(); range.popFront(); range.incrementLine(); - return Token(tok!"whitespace", cast(string) range.getMarked(), range.line, + return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, range.column, range.index); } + + Token lexScriptLine() pure nothrow + { + assert(false, "Not implemented"); + } - bool isSeparating(C)(C c) nothrow pure @safe + bool isSeparating(ElementType!R c) nothrow pure @safe { if (c <= 0x2f) return true; if (c >= ':' && c <= '@') return true; diff --git a/stdx/d/parser.d b/stdx/d/parser.d index 2519171..d80f361 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -1,62 +1,5 @@ // Written in the D programming language -/** - * This module contains a _parser for D source code. - * - * Grammar: - * The grammar format used in the documentation of this module generally follows - * the format used by the ANTLR _parser generator. - * $(UL - * $(LI Tokens and rules can be grouped by parenthesis.) - * $(LI An asterisk (*) indicates that the previous rule, token, or group - * can repeat 0 or more times.) - * $(LI A question mark (?) indicates that the previous rule, token, or group - * will be present either 0 or 1 times.) - * $(LI A plus sign (+) indicates that the previous rule, token, or group - * repeats one or more times. (i.e. it is optional)) - * $(LI If there is more than one way to match a rule, the alternatives will be - * separated by a pipe character (|).) - * $(LI Rule definitions begin with the rule name followed by a colon (:). Rule - * definitions end with a semicolon (;).) - * ) - * - * The grammar for D starts with the $(LINK2 #module, module) rule. - * - * Examples: - * --- - * import std.d.lexer; - * import std.d.parser; - * import std.d.ast; - * import std.array; - * - * string sourceCode = q{ - * import std.stdio; - * - * void main() - * { - * writeln("Hello, World."); - * } - * }c; - * void main() - * { - * LexerConfig config; - * auto tokens = byToken(cast(ubyte[]) sourceCode, config).array(); - * Module mod = parseModule(tokens); - * // Use module here... - * } - * --- - * - * Copyright: Brian Schott 2013 - * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) - * Authors: Brian Schott - * Source: $(PHOBOSSRC std/d/_parser.d) - * Macros: - * GRAMMAR =$0- * RULEDEF = $0 - * RULE = $0 - * LITERAL = $0 - */ - module stdx.d.parser; import stdx.d.lexer; @@ -2162,7 +2105,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; * Parses a ForStatement * * $(GRAMMAR $(RULEDEF forStatement): - * $(LITERAL 'for') $(LITERAL '$(LPAREN)') $(RULE declarationOrStatement) $(RULE expression)? $(LITERAL ';') $(RULE expression)? $(LITERAL '$(RPAREN)') $(RULE statementNoCaseNoDefault) + * $(LITERAL 'for') $(LITERAL '$(LPAREN)') $(RULE declarationOrStatement) $(RULE expression)? $(LITERAL ';') $(RULE expression)? $(LITERAL '$(RPAREN)') $(RULE declarationOrStatement) * ;) */ ForStatement parseForStatement() @@ -2176,7 +2119,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; if (currentIs(tok!";")) advance(); else - node.declarationOrStatement = parseDeclarationOrStatement(); + node.initialization = parseDeclarationOrStatement(); if (currentIs(tok!";")) advance(); @@ -2192,8 +2135,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; error("Statement expected", false); return node; // this line makes DCD better } - node.statementNoCaseNoDefault = parseStatementNoCaseNoDefault(); - if (node.statementNoCaseNoDefault is null) return null; + node.declarationOrStatement = parseDeclarationOrStatement(); + if (node.declarationOrStatement is null) return null; return node; } @@ -5917,7 +5860,7 @@ q{doStuff(5)}c; error("Statement expected", false); return node; // this line makes DCD better } - node.statementNoCaseNoDefault = parseStatementNoCaseNoDefault(); + node.declarationOrStatement = parseDeclarationOrStatement(); return node; } diff --git a/stdx/lexer.d b/stdx/lexer.d index da94701..980e530 100644 --- a/stdx/lexer.d +++ b/stdx/lexer.d @@ -10,12 +10,15 @@ */ module stdx.lexer; + import std.typecons; import std.algorithm; import std.range; import std.traits; import std.conv; import std.math; +import dpick.buffer.buffer; +import dpick.buffer.traits; template TokenIdType(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens) @@ -34,12 +37,12 @@ string TokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens { if (type == 0) return "!ERROR!"; - else if (type < staticTokens.length) + else if (type < staticTokens.length + 1) return staticTokens[type - 1]; - else if (type < staticTokens.length + possibleDefaultTokens.length) - return possibleDefaultTokens[type - staticTokens.length]; - else if (type < staticTokens.length + possibleDefaultTokens.length + dynamicTokens.length) - return dynamicTokens[type - staticTokens.length - possibleDefaultTokens.length]; + else if (type < staticTokens.length + possibleDefaultTokens.length + 1) + return possibleDefaultTokens[type - staticTokens.length - 1]; + else if (type < staticTokens.length + possibleDefaultTokens.length + dynamicTokens.length + 1) + return dynamicTokens[type - staticTokens.length - possibleDefaultTokens.length - 1]; else return null; } @@ -70,14 +73,16 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens, enum ii = possibleDefaultTokens.countUntil(symbol); static if (ii >= 0) { - enum id = ii + staticTokens.length; + enum id = ii + staticTokens.length + 1; static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol); alias id TokenId; } else { enum dynamicId = dynamicTokens.countUntil(symbol); - enum id = dynamicId >= 0 ? i + staticTokens.length + possibleDefaultTokens.length + dynamicId : -1; + enum id = dynamicId >= 0 + ? i + staticTokens.length + possibleDefaultTokens.length + dynamicId + 1 + : -1; static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol); alias id TokenId; } @@ -113,13 +118,10 @@ struct TokenStructure(IDType) IDType type; } -mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFunction, +mixin template Lexer(R, IDType, Token, alias defaultTokenFunction, alias staticTokens, alias dynamicTokens, alias pseudoTokens, - alias possibleDefaultTokens) if (isForwardRange!R) + alias pseudoTokenHandlers, alias possibleDefaultTokens) { - enum size_t lookAhead = chain(staticTokens, pseudoTokens).map!"a.length".reduce!"max(a, b)"(); - alias PeekRange!(R, lookAhead) RangeType; - static string generateCaseStatements(string[] tokens, size_t offset = 0) { string code; @@ -141,9 +143,9 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun code ~= generateLeaf(tokens[i], indent ~ " "); else { - code ~= indent ~ " if (!range.canPeek(" ~ text(tokens[i].length - 1) ~ "))\n"; + code ~= indent ~ " if (range.lookahead(" ~ text(tokens[i].length) ~ ").length == 0)\n"; code ~= indent ~ " goto outer_default;\n"; - code ~= indent ~ " if (range.startsWith(\"" ~ escape(tokens[i]) ~ "\"))\n"; + code ~= indent ~ " if (range.lookahead(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~ "\")\n"; code ~= indent ~ " {\n"; code ~= generateLeaf(tokens[i], indent ~ " "); code ~= indent ~ " }\n"; @@ -153,11 +155,11 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun } else { - code ~= indent ~ " if (!range.canPeek(" ~ text(offset + 1) ~ "))\n"; + code ~= indent ~ " if (range.lookahead(" ~ text(offset + 2) ~ ").length == 0)\n"; code ~= indent ~ " {\n"; code ~= generateLeaf(tokens[i][0 .. offset + 1], indent ~ " "); code ~= indent ~ " }\n"; - code ~= indent ~ " switch (range.peek(" ~ text(offset + 1) ~ "))\n"; + code ~= indent ~ " switch (range.lookahead(" ~ text(offset + 2) ~ ")[" ~ text(offset + 1) ~ "])\n"; code ~= indent ~ " {\n"; code ~= generateCaseStatements(tokens[i .. j], offset + 1); code ~= indent ~ " default:\n"; @@ -172,6 +174,8 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun static string generateLeaf(string token, string indent) { + static assert (pseudoTokenHandlers.length % 2 == 0, + "Each pseudo-token must have a matching function name."); string code; if (staticTokens.countUntil(token) >= 0) { @@ -179,13 +183,13 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun code ~= indent ~ "range.popFront();\n"; else code ~= indent ~ "range.popFrontN(" ~ text(token.length) ~ ");\n"; - code ~= indent ~ "return Token(tok!\"" ~ escape(token) ~"\", null, range.line, range.column, range.index);\n"; + code ~= indent ~ "return Token(tok!\"" ~ escape(token) ~ "\", null, range.line, range.column, range.index);\n"; } else if (pseudoTokens.countUntil(token) >= 0) - code ~= indent ~ "return postProcess(pseudoTok!\"" ~ escape(token) ~"\");\n"; + code ~= indent ~ "return " ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1] ~ "();\n"; else if (possibleDefaultTokens.countUntil(token) >= 0) { - code ~= indent ~ "if (!range.canPeek(" ~ text(token.length) ~ ") || isSeparating(range.peek(" ~ text(token.length) ~ ")))\n"; + code ~= indent ~ "if (range.lookahead(" ~ text(token.length + 1) ~ ").length == 0 || isSeparating(range.lookahead(" ~ text(token.length + 1) ~ ")[" ~ text(token.length) ~ "]))\n"; code ~= indent ~ "{\n"; if (token.length == 1) code ~= indent ~ " range.popFront();\n"; @@ -211,18 +215,11 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun _front = advance(); } - bool empty() const nothrow @property + bool empty() pure const nothrow @property { return _front.type == tok!"\0"; } - template pseudoTok(string symbol) - { - static assert (pseudoTokens.countUntil(symbol) >= 0); - enum index = cast(IDType) pseudoTokens.countUntil(symbol); - alias index pseudoTok; - } - static string escape(string input) { string rVal; @@ -267,224 +264,36 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun return rVal; } - void registerPostProcess(alias t)(Token delegate() pure fun) - { - post[pseudoTok!t] = fun; - } - - Token postProcess(IDType i) pure - { - assert (post[i] !is null, "No post-processing function registered for " ~ pseudoTokens[i]); - return post[i](); - } - - Token delegate() pure [pseudoTokens.length] post; - RangeType range; + LexerRange!(typeof(buffer(R.init))) range; Token _front; } -struct PeekRange(R, size_t peekSupported = 1) if (isRandomAccessRange!R - && isForwardRange!R && hasSlicing!R) +struct LexerRange(BufferType) if (isBuffer!BufferType) { -public: - - this(R range) + this(BufferType r) { - this.range = range; + this.range = r; + index = 0; + column = 1; + line = 1; } - invariant() + void popFront() pure { - import std.string; - if (range.length != 6190) - assert (false, format("range.length = %d %s", range.length, cast(char[]) range[0 .. 100])); + index++; + column++; + range.popFront(); } - bool startsWith(string s) - { - return index + s.length < range.length - && (cast(const(ubyte[])) s) == range[index .. index + s.length]; - } - - bool empty() pure nothrow const @property - { - return _index >= range.length; - } - - const(ElementType!R) front() pure nothrow const @property - in - { - assert (!empty); - } - body - { - return range[_index]; - } - - void popFront() pure nothrow - { - _index++; - _column++; - } - - void popFrontN(size_t n) pure nothrow - { - foreach (i; 0 .. n) - popFront(); - } - - const(ElementType!R) peek(int offset = 1) pure nothrow const - in - { - assert (canPeek(offset)); - } - body - { - return range[_index + offset]; - } - - bool canPeek(size_t offset = 1) pure nothrow const - { - return _index + offset < range.length; - } - - void mark() nothrow pure - { - markBegin = _index; - } - - const(R) getMarked() pure nothrow const - { - return range[markBegin .. _index]; - } - void incrementLine() pure nothrow { - _column = 1; - _line++; + column = 1; + line++; } - - size_t line() pure nothrow const @property { return _line; } - size_t column() pure nothrow const @property { return _column; } - size_t index() pure nothrow const @property { return _index; } - -private: - size_t markBegin; - size_t _column = 1; - size_t _line = 1; - size_t _index = 0; - R range; + + BufferType range; + alias range this; + size_t index; + size_t column; + size_t line; } - -//struct PeekRange(R, size_t peekSupported = 1) -// if (!isRandomAccessRange!R && isForwardRange!R) -//{ -//public: -// -// this(R range) -// { -// this.range = range; -// for (size_t i = 0; !this.range.empty && i < peekSupported; i++) -// { -// rangeSizeCount++; -// buffer[i] = this.range.front; -// range.popFront(); -// } -// } -// -// ElementType!R front() const @property -// in -// { -// assert (!empty); -// } -// body -// { -// return buffer[bufferIndex]; -// } -// -// void popFront() -// in -// { -// assert (!empty); -// } -// body -// { -// index++; -// column++; -// count++; -// bufferIndex = bufferIndex + 1 > buffer.length ? 0 : bufferIndex + 1; -// if (marking) -// markBuffer.put(buffer[bufferIndex]); -// if (!range.empty) -// { -// buffer[bufferIndex + peekSupported % buffer.length] = range.front(); -// range.popFront(); -// rangeSizeCount++; -// } -// } -// -// bool empty() const nothrow pure @property -// { -// return rangeSizeCount == count; -// } -// -// ElementType!R peek(int offset = 1) pure nothrow const -// in -// { -// assert (canPeek(offset)); -// } -// body -// { -// return buffer[(bufferIndex + offset) % buffer.length]; -// } -// -// bool canPeek(size_t int offset = 1) pure nothrow const -// { -// return offset <= peekSupported && count + offset <= rangeSizeCount; -// } -// -// typeof(this) save() @property -// { -// typeof(this) newRange; -// newRange.count = count; -// newRange.rangeSizeCount = count; -// newRange.buffer = buffer.dup; -// newRange.bufferIndex = bufferIndex; -// newRange.range = range.save; -// return newRange; -// } -// -// void mark() -// { -// marking = true; -// markBuffer.clear(); -// } -// -// ElementEncodingType!R[] getMarked() -// { -// marking = false; -// return markBuffer.data; -// } -// -// void incrementLine() pure nothrow -// { -// _column = 1; -// _line++; -// } -// -// size_t line() pure nothrow const @property { return _line; } -// size_t column() pure nothrow const @property { return _column; } -// size_t index() pure nothrow const @property { return _index; } -// -//private: -// auto markBuffer = appender!(ElementType!R[])(); -// bool marking; -// size_t count; -// size_t rangeSizeCount; -// ElementType!(R)[peekSupported + 1] buffer; -// size_t bufferIndex; -// size_t _column = 1; -// size_t _line = 1; -// size_t _index = 0; -// R range; -//}