From 070f9ac83b448911c192cd0be2b6e31d313c6664 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Sun, 12 Jan 2014 02:45:37 +0000 Subject: [PATCH] Finished conversion to new lexer --- ctags.d | 7 +- datapicked | 2 +- main.d | 2 +- stdx/d/ast.d | 2 +- stdx/d/lexer.d | 195 +++++++++++++++++++++++++++++------------------- stdx/d/parser.d | 53 +++++++++++-- 6 files changed, 172 insertions(+), 89 deletions(-) diff --git a/ctags.d b/ctags.d index db633df..7000833 100644 --- a/ctags.d +++ b/ctags.d @@ -24,7 +24,7 @@ void printCtags(File output, string[] fileNames) File f = File(fileName); auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size)); f.rawRead(bytes); - auto tokens = DLexer!(typeof(bytes))(bytes); + auto tokens = byToken(bytes); Module m = parseModule(tokens.array, fileName, &doNothing); auto printer = new CTagsPrinter; printer.fileName = fileName; @@ -40,9 +40,6 @@ void printCtags(File output, string[] fileNames) class CTagsPrinter : ASTVisitor { - - alias ASTVisitor.visit visit; - override void visit(ClassDeclaration dec) { tagLines ~= "%s\t%s\t%d;\"\tc%s\n".format(dec.name.text, fileName, dec.name.line, context); @@ -134,6 +131,8 @@ class CTagsPrinter : ASTVisitor } dec.accept(this); } + + alias ASTVisitor.visit visit; string fileName; string[] tagLines; diff --git a/datapicked b/datapicked index eb14a52..f63a843 160000 --- a/datapicked +++ b/datapicked @@ -1 +1 @@ -Subproject commit eb14a5244153c0e13ceca79f292838dfe2ac9bfb +Subproject commit f63a843e9c0ce8db7fd897684fe323697255d87d diff --git a/main.d b/main.d index 6ec2759..5ddf6a0 100644 --- a/main.d +++ b/main.d @@ -234,7 +234,7 @@ options: Prints the number of logical lines of code in the given source files. If no files are specified, input is read from stdin. - --tokenCount | t [sourceFiles] + --tokenCount | -t [sourceFiles] Prints the number of tokens in the given source files. If no files are specified, input is read from stdin. diff --git a/stdx/d/ast.d b/stdx/d/ast.d index ba948d0..adf4ccc 100644 --- a/stdx/d/ast.d +++ b/stdx/d/ast.d @@ -943,7 +943,7 @@ public: destructor, staticConstructor, staticDestructor, sharedStaticDestructor, sharedStaticConstructor, conditionalDeclaration, pragmaDeclaration, versionSpecification, - declarations)); + invariant_, postblit, declarations)); } /** */ Attribute[] attributes; diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index 735054a..08c6e0c 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -18,7 +18,7 @@ private enum staticTokens = [ private enum pseudoTokens = [ "\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!", - "\u2028", "\u2029" + "#line", "\u2028", "\u2029" ]; private enum possibleDefaultTokens = [ @@ -343,15 +343,15 @@ public struct DLexer(R) ".", "lexDot", "'", "lexCharacterLiteral", "0", "lexNumber", - "1", "lexNumber", - "2", "lexNumber", - "3", "lexNumber", - "4", "lexNumber", - "5", "lexNumber", - "6", "lexNumber", - "7", "lexNumber", - "8", "lexNumber", - "9", "lexNumber", + "1", "lexDecimal", + "2", "lexDecimal", + "3", "lexDecimal", + "4", "lexDecimal", + "5", "lexDecimal", + "6", "lexDecimal", + "7", "lexDecimal", + "8", "lexDecimal", + "9", "lexDecimal", "q\"", "lexDelimitedString", "q{", "lexTokenString", "r\"", "lexWysiwygString", @@ -362,7 +362,8 @@ public struct DLexer(R) "\n", "lexWhitespace", "\u2028", "lexLongNewline", "\u2029", "lexLongNewline", - "#!", "lexScriptLine" + "#!", "lexScriptLine", + "#line", "lexSpecialTokenSequence" ]; mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens, @@ -437,7 +438,7 @@ public struct DLexer(R) Token lexWhitespace() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); loop: do { switch (range.front) @@ -475,13 +476,13 @@ public struct DLexer(R) break loop; } } while (!range.empty); - return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, - range.column, range.index); + return Token(tok!"whitespace", cast(string) range.slice(mark), line, + column, index); } Token lexNumber() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); auto lookahead = range.lookahead(2); if (range.front == '0' && lookahead.length == 2) { @@ -491,27 +492,27 @@ public struct DLexer(R) case 'X': range.popFront(); range.popFront(); - return lexHex(mark); + return lexHex(mark, line, column, index); case 'b': case 'B': range.popFront(); range.popFront(); - return lexBinary(mark); + return lexBinary(mark, line, column, index); default: - return lexDecimal(mark); + return lexDecimal(mark, line, column, index); } } else - return lexDecimal(mark); + return lexDecimal(mark, line, column, index); } Token lexHex() pure nothrow { - auto mark = range.mark(); - return lexHex(mark); + mixin (tokenStart); + return lexHex(mark, line, column, index); } - Token lexHex(Mark mark) pure nothrow + Token lexHex(Mark mark, size_t line, size_t column, size_t index) pure nothrow { IdType type = tok!"intLiteral"; bool foundDot; @@ -556,17 +557,17 @@ public struct DLexer(R) break hexLoop; } } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } Token lexBinary() pure nothrow { - auto mark = range.mark(); - return lexBinary(mark); + mixin (tokenStart); + return lexBinary(mark, line, column, index); } - Token lexBinary(Mark mark) pure nothrow + Token lexBinary(Mark mark, size_t line, size_t column, size_t index) pure nothrow { IdType type = tok!"intLiteral"; binaryLoop: while (!range.empty) @@ -587,11 +588,17 @@ public struct DLexer(R) break binaryLoop; } } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } - Token lexDecimal(Mark mark) pure nothrow + Token lexDecimal() + { + mixin (tokenStart); + return lexDecimal(mark, line, column, index); + } + + Token lexDecimal(Mark mark, size_t line, size_t column, size_t index) pure nothrow { bool foundDot = range.front == '.'; IdType type = tok!"intLiteral"; @@ -665,8 +672,8 @@ public struct DLexer(R) break decimalLoop; } } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } void lexIntSuffix(ref IdType type) pure nothrow @safe @@ -768,15 +775,27 @@ public struct DLexer(R) } } - - Token lexSpecialTokenSequence() pure nothrow @safe + Token lexScriptLine() pure { - assert (false, "Not implemented"); + mixin (tokenStart); + while (!range.empty && !isNewline) + range.popFront(); + return Token(tok!"scriptLine", cast(string) range.slice(mark), + line, column, index); + } + + Token lexSpecialTokenSequence() pure + { + mixin (tokenStart); + while (!range.empty && !isNewline) + range.popFront(); + return Token(tok!"specialTokenSequence", cast(string) range.slice(mark), + line, column, index); } Token lexSlashStarComment() pure { - auto mark = range.mark(); + mixin (tokenStart); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -794,13 +813,13 @@ public struct DLexer(R) else popFrontWhitespaceAware(); } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } Token lexSlashSlashComment() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -810,13 +829,13 @@ public struct DLexer(R) break; range.popFront(); } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } Token lexSlashPlusComment() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); IdType type = tok!"comment"; range.popFront(); range.popFront(); @@ -844,13 +863,13 @@ public struct DLexer(R) else popFrontWhitespaceAware(); } - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } Token lexStringLiteral() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); range.popFront(); while (true) { @@ -873,13 +892,13 @@ public struct DLexer(R) } IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } Token lexWysiwygString() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); IdType type = tok!"stringLiteral"; bool backtick = range.front == '`'; if (backtick) @@ -927,8 +946,8 @@ public struct DLexer(R) } } lexStringSuffix(type); - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } void lexStringSuffix(ref IdType type) pure @@ -950,7 +969,7 @@ public struct DLexer(R) Token lexDelimitedString() pure nothrow { import std.traits; - auto mark = range.mark(); + mixin (tokenStart); range.popFront(); range.popFront(); Unqual!(ElementEncodingType!R) open; @@ -961,29 +980,30 @@ public struct DLexer(R) open = '<'; close = '>'; range.popFront(); - return lexNormalDelimitedString(mark, open, close); + return lexNormalDelimitedString(mark, line, column, index, open, close); case '{': open = '{'; close = '}'; range.popFront(); - return lexNormalDelimitedString(mark, open, close); + return lexNormalDelimitedString(mark, line, column, index, open, close); case '[': open = '['; close = ']'; range.popFront(); - return lexNormalDelimitedString(mark, open, close); + return lexNormalDelimitedString(mark, line, column, index, open, close); case '(': open = '('; close = ')'; range.popFront(); - return lexNormalDelimitedString(mark, open, close); + return lexNormalDelimitedString(mark, line, column, index, open, close); default: return lexHeredocString(); } } - Token lexNormalDelimitedString(Mark mark, ElementEncodingType!R open, - ElementEncodingType!R close) pure nothrow + Token lexNormalDelimitedString(Mark mark, size_t line, size_t column, + size_t index, ElementEncodingType!R open, ElementEncodingType!R close) + pure nothrow { int depth = 1; while (!range.empty && depth > 0) @@ -1013,7 +1033,7 @@ public struct DLexer(R) } IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); + return Token(type, cast(string) range.slice(mark), line, column, index); } Token lexHeredocString() pure nothrow @@ -1023,6 +1043,7 @@ public struct DLexer(R) Token lexTokenString() pure { + mixin (tokenStart); assert(range.front == 'q'); range.popFront(); assert(range.front == '{'); @@ -1055,12 +1076,12 @@ public struct DLexer(R) } IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, app.data, range.line, range.column, range.index); + return Token(type, app.data, line, column, index); } Token lexHexString() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); range.popFront(); range.popFront(); @@ -1091,8 +1112,8 @@ public struct DLexer(R) IdType type = tok!"stringLiteral"; lexStringSuffix(type); - return Token(type, cast(string) range.slice(mark), range.line, range.column, - range.index); + return Token(type, cast(string) range.slice(mark), line, column, + index); } bool lexEscapeSequence() pure nothrow @@ -1190,7 +1211,7 @@ public struct DLexer(R) Token lexCharacterLiteral() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); range.popFront(); if (range.front == '\\') { @@ -1201,7 +1222,7 @@ public struct DLexer(R) { range.popFront(); return Token(tok!"characterLiteral", cast(string) range.slice(mark), - range.line, range.column, range.index); + line, column, index); } else if (range.front & 0x80) { @@ -1219,7 +1240,7 @@ public struct DLexer(R) { range.popFront(); return Token(tok!"characterLiteral", cast(string) range.slice(mark), - range.line, range.column, range.index); + line, column, index); } else { @@ -1230,22 +1251,23 @@ public struct DLexer(R) Token lexIdentifier() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); while (!range.empty && !isSeparating(range.front)) { range.popFront(); } - return Token(tok!"identifier", cast(string) range.slice(mark), range.line, - range.column, range.index); + return Token(tok!"identifier", cast(string) range.slice(mark), line, + column, index); } Token lexDot() pure nothrow { + mixin (tokenStart); auto lookahead = range.lookahead(1); if (lookahead.length == 0) { range.popFront(); - return Token(tok!".", null, range.line, range.column, range.index); + return Token(tok!".", null, line, column, index); } switch (lookahead[0]) { @@ -1257,30 +1279,36 @@ public struct DLexer(R) if (!range.empty && range.front == '.') { range.popFront(); - return Token(tok!"...", null, range.line, range.column, range.index); + return Token(tok!"...", null, line, column, index); } else - return Token(tok!"..", null, range.line, range.column, range.index); + return Token(tok!"..", null, line, column, index); default: range.popFront(); - return Token(tok!".", null, range.line, range.column, range.index); + return Token(tok!".", null, line, column, index); } } Token lexLongNewline() pure nothrow { - auto mark = range.mark(); + mixin (tokenStart); range.popFront(); range.popFront(); range.popFront(); range.incrementLine(); - return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, - range.column, range.index); + return Token(tok!"whitespace", cast(string) range.slice(mark), line, + column, index); } - Token lexScriptLine() pure nothrow + bool isNewline() pure @safe { - assert(false, "Not implemented"); + if (range.front == '\n') return true; + if (range.front == '\r') return true; + auto lookahead = range.lookahead(3); + if (lookahead.length == 0) return false; + if (lookahead.startsWith("\u2028") || lookahead.startsWith("\u2029")) + return true; + return false; } bool isSeparating(ElementType!R c) nothrow pure @safe @@ -1290,10 +1318,23 @@ public struct DLexer(R) if (c >= '[' && c <= '^') return true; if (c >= '{' && c <= '~') return true; if (c == '`') return true; +// if (c & 0x80 && (range.lookahead(3).startsWith("\u2028") +// || range.lookahead(3).startsWith("\u2029"))) return true; return false; } + enum tokenStart = q{ + size_t index = range.index; + size_t column = range.column; + size_t line = range.line; + auto mark = range.mark(); + }; + void error(...) pure { } + + void warning(...) pure { + + } } diff --git a/stdx/d/parser.d b/stdx/d/parser.d index 38ded73..fbdc62d 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -96,7 +96,6 @@ class Parser unittest { - stderr.writeln("Running unittest for parseAliasDeclaration."); auto sourceCode = q{ alias core.sys.posix.stdio.fileno fileno; @@ -128,6 +127,16 @@ alias core.sys.posix.stdio.fileno fileno; node.type = parseType(); return node; } + + unittest + { + auto sourceCode = q{a = abcde!def}; + Parser p = getParserForUnittest(sourceCode, "parseAliasInitializer"); + auto initializer = p.parseAliasInitializer(); + assert (initializer !is null); + assert (p.errorCount == 0); + stderr.writeln("Unittest for parseAliasInitializer() passed."); + } /** * Parses an AliasThisDeclaration @@ -147,6 +156,16 @@ alias core.sys.posix.stdio.fileno fileno; if (expect(tok!";") is null) return null; return node; } + + unittest + { + auto sourceCode = q{alias oneTwoThree this;}; + Parser p = getParserForUnittest(sourceCode, "parseAliasThisDeclaration"); + auto aliasThis = p.parseAliasThisDeclaration(); + assert (aliasThis !is null); + assert (p.errorCount == 0); + stderr.writeln("Unittest for parseAliasThisDeclaration() passed."); + } /** * Parses an AlignAttribute. @@ -169,6 +188,18 @@ alias core.sys.posix.stdio.fileno fileno; } return node; } + + unittest + { + auto sourceCode = q{align(42) align}; + Parser p = getParserForUnittest(sourceCode, "parseAlignAttribute"); + auto attribute = p.parseAlignAttribute(); + assert (attribute !is null); + attribute = p.parseAlignAttribute(); + assert (attribute !is null); + assert (p.errorCount == 0); + stderr.writeln("Unittest for parseAlignAttribute() passed."); + } /** * Parses an AndAndExpression @@ -3098,6 +3129,16 @@ invariant() foo(); if (expect(tok!")") is null) return null; return node; } + + unittest + { + auto sourceCode = q{is ( x : uybte)}c; + Parser p = getParserForUnittest(sourceCode, "parseIsExpression"); + auto isExp1 = p.parseIsExpression(); + assert (isExp1 !is null); + assert (p.errorCount == 0); + stderr.writeln("Unittest for parseIsExpression passed."); + } /** * Parses a KeyValuePair @@ -3369,6 +3410,10 @@ invariant() foo(); node.symbol = parseSymbol(); return node; } + + unittest + { + } /** * Parses a Module @@ -5951,8 +5996,6 @@ protected: return hasMagicDelimiter!(tok!":")(); } - - bool hasMagicDelimiter(alias T)() { mixin(traceEnterAndExit!(__FUNCTION__)); @@ -6432,14 +6475,14 @@ protected: } version (unittest) static void doNothingErrorFunction(string fileName, - int line, int column, string message) {} + size_t line, size_t column, string message) {} version (unittest) static Parser getParserForUnittest(string sourceCode, string testName) { auto r = byToken(cast(ubyte[]) sourceCode); Parser p = new Parser; - //p.messageFunction = &doNothingErrorFunction; + p.messageFunction = &doNothingErrorFunction; p.fileName = testName ~ ".d"; p.tokens = r.array(); return p;