From 0e3adafd488c7f5e917ebc5fc882ff70d42d9d4e Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Mon, 13 Jan 2014 20:51:42 -0800 Subject: [PATCH] Added ddoc comment handling --- astprinter.d | 20 ++++++-- main.d | 21 ++++++-- stdx/d/ast.d | 16 ++++++ stdx/d/lexer.d | 128 ++++++++++++++++++++++++++++++++++++++---------- stdx/d/parser.d | 45 +++++++++++++---- stdx/lexer.d | 13 ++--- 6 files changed, 195 insertions(+), 48 deletions(-) diff --git a/astprinter.d b/astprinter.d index ae2e855..603cc13 100644 --- a/astprinter.d +++ b/astprinter.d @@ -411,6 +411,7 @@ class XMLPrinter : ASTVisitor override void visit(EnumDeclaration enumDec) { output.writeln(""); + writeDdoc(enumDec.comment); if (enumDec.name.type == tok!"identifier") output.writeln("", enumDec.name.text, ""); enumDec.accept(this); @@ -420,6 +421,7 @@ class XMLPrinter : ASTVisitor override void visit(EnumMember enumMem) { output.writeln(""); + writeDdoc(enumMem.comment); enumMem.accept(this); output.writeln(""); } @@ -553,6 +555,7 @@ class XMLPrinter : ASTVisitor { output.writeln(""); output.writeln("", functionDec.name.text, ""); + writeDdoc(functionDec.comment); if (functionDec.hasAuto) output.writeln(""); if (functionDec.hasRef) @@ -726,6 +729,7 @@ class XMLPrinter : ASTVisitor { output.writeln(""); output.writeln("", interfaceDec.name.text, ""); + writeDdoc(interfaceDec.comment); interfaceDec.accept(this); output.writeln(""); } @@ -733,6 +737,7 @@ class XMLPrinter : ASTVisitor override void visit(Invariant invariant_) { output.writeln(""); + writeDdoc(invariant_.comment); invariant_.accept(this); output.writeln(""); } @@ -1193,7 +1198,7 @@ class XMLPrinter : ASTVisitor output.writeln(""); return; } - + writeDdoc(templateDeclaration.comment); output.writeln(""); output.writeln("", templateDeclaration.name.text, ""); @@ -1432,7 +1437,10 @@ class XMLPrinter : ASTVisitor override void visit(VariableDeclaration variableDeclaration) { - mixin (tagAndAccept!"variableDeclaration"); + output.writeln(""); + writeDdoc(variableDeclaration.comment); + variableDeclaration.accept(this); + output.writeln(""); } override void visit(Vector vector) @@ -1477,10 +1485,16 @@ class XMLPrinter : ASTVisitor alias ASTVisitor.visit visit; - private string xmlEscape(string s) + private static string xmlEscape(string s) { return s.translate(['<' : "<", '>' : ">", '&' : "&"]); } + private void writeDdoc(string comment) + { + if (comment is null) return; + output.writeln("", xmlEscape(comment), ""); + } + File output; } diff --git a/main.d b/main.d index d24e91b..a999515 100644 --- a/main.d +++ b/main.d @@ -96,7 +96,11 @@ int main(string[] args) { bool usingStdin = args.length == 1; ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]); - auto tokens = byToken!(ubyte[], false, false)(bytes); + LexerConfig config; + config.whitespaceBehavior = WhitespaceBehavior.include; + config.stringBehavior = StringBehavior.source; + config.commentBehavior = CommentBehavior.include; + auto tokens = byToken(bytes, config); highlighter.highlight(tokens, args.length == 1 ? "stdin" : args[1]); return 0; } @@ -104,11 +108,16 @@ int main(string[] args) { bool usingStdin = args.length == 1; ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]); - auto tokens = byToken!(ubyte[], false, false)(bytes); + LexerConfig config; + config.whitespaceBehavior = WhitespaceBehavior.skip; + config.stringBehavior = StringBehavior.source; + config.commentBehavior = CommentBehavior.attach; + auto tokens = byToken(bytes, config); foreach (ref token; tokens) { writeln("«", token.text is null ? str(token.type) : token.text, - " ", token.index, " ", token.line, " ", token.column, "»"); + " ", token.index, " ", token.line, " ", token.column, " ", + token.comment, "»"); } } else if (ctags) @@ -126,7 +135,11 @@ int main(string[] args) { if (usingStdin) { - auto tokens = byToken!(ubyte[], false, false)(readStdin()); + LexerConfig config; + config.whitespaceBehavior = WhitespaceBehavior.include; + config.stringBehavior = StringBehavior.source; + config.commentBehavior = CommentBehavior.include; + auto tokens = byToken(readStdin(), config); if (tokenCount) printTokenCount(stdout, "stdin", tokens); else diff --git a/stdx/d/ast.d b/stdx/d/ast.d index adf4ccc..dd9f3e3 100644 --- a/stdx/d/ast.d +++ b/stdx/d/ast.d @@ -290,6 +290,7 @@ public: /** */ Type type; /** */ Token name; /** */ AliasInitializer[] initializers; + /** */ string comment; } /// @@ -808,6 +809,7 @@ public: /** */ Constraint constraint; /** */ BaseClassList baseClassList; /** */ StructBody structBody; + /** */ string comment; } /// @@ -891,6 +893,7 @@ public: /** */ MemberFunctionAttribute[] memberFunctionAttributes; /** */ TemplateParameters templateParameters; /** */ size_t location; + /** */ string comment; } /// @@ -1067,6 +1070,7 @@ public: } /** */ FunctionBody functionBody; /** */ size_t location; + /** */ string comment; } /// @@ -1113,6 +1117,7 @@ public: /** */ Token name; /** */ Type type; /** */ EnumBody enumBody; + /** */ string comment; } /// @@ -1126,6 +1131,7 @@ public: /** */ Token name; /** */ Type type; /** */ AssignExpression assignExpression; + /** */ string comment; } /// @@ -1325,6 +1331,7 @@ public: /** */ Constraint constraint; /** */ FunctionBody functionBody; /** */ MemberFunctionAttribute[] memberFunctionAttributes; + /** */ string comment; } /// @@ -1549,6 +1556,7 @@ public: /** */ Constraint constraint; /** */ BaseClassList baseClassList; /** */ StructBody structBody; + /** */ string comment; } /// @@ -1560,6 +1568,7 @@ public: mixin (visitIfNotNull!(blockStatement)); } /** */ BlockStatement blockStatement; + /** */ string comment; } /// @@ -2067,6 +2076,7 @@ public: } /** */ FunctionBody functionBody; /** */ size_t location; + /** */ string comment; } /// @@ -2079,6 +2089,7 @@ public: } /** */ FunctionBody functionBody; /** */ size_t location; + /** */ string comment; } /// @@ -2236,6 +2247,7 @@ public: /** */ TemplateParameters templateParameters; /** */ Constraint constraint; /** */ StructBody structBody; + /** */ string comment; } /// @@ -2376,6 +2388,7 @@ public: /** */ Constraint constraint; /** */ Declaration[] declarations; /** */ EponymousTemplateDeclaration eponymousTemplateDeclaration; + /** */ string comment; } /// @@ -2694,6 +2707,7 @@ public: /** */ TemplateParameters templateParameters; /** */ Constraint constraint; /** */ StructBody structBody; + /** */ string comment; } /// @@ -2705,6 +2719,7 @@ public: mixin (visitIfNotNull!(blockStatement)); } /** */ BlockStatement blockStatement; + /** */ string comment; } /// @@ -2719,6 +2734,7 @@ public: /** */ Declarator[] declarators; /** */ StorageClass storageClass; /** */ AutoDeclaration autoDeclaration; + /** */ string comment; } /// diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index 08c6e0c..23ad718 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -50,31 +50,69 @@ private enum dynamicTokens = [ ]; public alias TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens) IdType; -public alias TokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens) str; +public alias tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens) str; public template tok(string token) { alias TokenId!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens, token) tok; } -public alias stdx.lexer.TokenStructure!(IdType) Token; +enum extraFields = q{ + string comment; +}; +public alias stdx.lexer.TokenStructure!(IdType, extraFields) Token; -pure nothrow bool isNotComment(const Token t) { return t.type != tok!"comment"; } -pure nothrow bool isNotWhitespace(const Token t) { return t.type != tok!"whitespace"; } -pure nothrow bool isNotEither(const Token t) { return t.type != tok!"whitespace" && t.type != tok!"comment"; } - -public auto byToken(R, bool skipComments = true, bool skipWhitespace = true)(R range) +/** + * Configure string lexing behavior + */ +public enum StringBehavior : ubyte { - auto tokens = DLexer!(R)(range); - static if (skipComments) - { - static if (skipWhitespace) - return filter!isNotEither(tokens); - else - return filter!isNotComment(tokens); - } - else static if (skipWhitespace) - return filter!isNotWhitespace(tokens); - else - return tokens; + /// Do not include quote characters, process escape sequences + compiler = 0b0000_0000, + /// Opening quotes, closing quotes, and string suffixes are included in the + /// string token + includeQuoteChars = 0b0000_0001, + /// String escape sequences are not replaced + notEscaped = 0b0000_0010, + /// Not modified at all. Useful for formatters or highlighters + source = includeQuoteChars | notEscaped +} + +/** + * Configure whitespace handling behavior + */ +public enum WhitespaceBehavior : ubyte +{ + /// Whitespace is skipped + skip, + /// Whitespace is treated as a token + include +} +/** + * Configure comment handling behavior + */ +public enum CommentBehavior : ubyte +{ + /// Comments are attached to the non-whitespace token that follows them + attach, + /// Comments are tokens, and can be returned by calls to the token range's front() + include +} + +public struct LexerConfig +{ + StringBehavior stringBehavior; + WhitespaceBehavior whitespaceBehavior; + CommentBehavior commentBehavior; +} + +public auto byToken(R)(R range) +{ + LexerConfig config; + return byToken(range, config); +} + +public auto byToken(R)(R range, const LexerConfig config) +{ + return DLexer!(R)(range, config); } unittest @@ -371,12 +409,49 @@ public struct DLexer(R) private alias typeof(range).Mark Mark; - this(R range) + this(R range, const LexerConfig config) { this.range = LexerRange!(typeof(buffer(range)))(buffer(range)); + this.config = config; popFront(); } + private static bool isDocComment(string comment) pure nothrow @safe + { + return comment.length >= 3 && (comment[0 .. 3] == "///" + || comment[0 .. 3] == "/**" || comment[0 .. 3] == "/++"); + } + + public void popFront() + { + _popFront(); + string comment = null; + switch (_front.type) + { + case tok!"comment": + if (config.commentBehavior == CommentBehavior.attach) + { + import std.string; + if (isDocComment(front.text)) + comment = comment == null ? front.text : format("%s\n%s", comment, front.text); + do _popFront(); while (front == tok!"comment"); + if (front == tok!"whitespace") goto case tok!"whitespace"; + } + break; + case tok!"whitespace": + if (config.whitespaceBehavior == WhitespaceBehavior.skip) + { + do _popFront(); while (front == tok!"whitespace"); + if (front == tok!"comment") goto case tok!"comment"; + } + break; + default: + break; + } + _front.comment = comment; + } + + bool isWhitespace() pure /*const*/ nothrow { switch (range.front) @@ -597,7 +672,7 @@ public struct DLexer(R) mixin (tokenStart); return lexDecimal(mark, line, column, index); } - + Token lexDecimal(Mark mark, size_t line, size_t column, size_t index) pure nothrow { bool foundDot = range.front == '.'; @@ -728,6 +803,7 @@ public struct DLexer(R) } if (!range.empty && range.front == 'i') { + warning("Complex number literals are deprecated"); range.popFront(); if (type == tok!"floatLiteral") type = tok!"ifloatLiteral"; @@ -783,7 +859,7 @@ public struct DLexer(R) return Token(tok!"scriptLine", cast(string) range.slice(mark), line, column, index); } - + Token lexSpecialTokenSequence() pure { mixin (tokenStart); @@ -1329,12 +1405,14 @@ public struct DLexer(R) size_t line = range.line; auto mark = range.mark(); }; - + void error(...) pure { } - + void warning(...) pure { - + } + + const LexerConfig config; } diff --git a/stdx/d/parser.d b/stdx/d/parser.d index fbdc62d..c8c78be 100644 --- a/stdx/d/parser.d +++ b/stdx/d/parser.d @@ -127,7 +127,7 @@ alias core.sys.posix.stdio.fileno fileno; node.type = parseType(); return node; } - + unittest { auto sourceCode = q{a = abcde!def}; @@ -156,7 +156,7 @@ alias core.sys.posix.stdio.fileno fileno; if (expect(tok!";") is null) return null; return node; } - + unittest { auto sourceCode = q{alias oneTwoThree this;}; @@ -188,7 +188,7 @@ alias core.sys.posix.stdio.fileno fileno; } return node; } - + unittest { auto sourceCode = q{align(42) align}; @@ -1170,6 +1170,8 @@ incorrect; auto ident = expect(tok!"identifier"); if (ident is null) return null; node.name = *ident; + node.comment = comment; + comment = null; if (currentIs(tok!"(")) { node.templateParameters = parseTemplateParameters(); @@ -1409,6 +1411,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; { mixin(traceEnterAndExit!(__FUNCTION__)); Constructor node = new Constructor; + node.comment = comment; + comment = null; auto t = expect(tok!"this"); if (t is null) return null; node.location = t.index; @@ -1557,7 +1561,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new Declaration; - + comment = current.comment; do { if (!isAttribute()) @@ -1904,6 +1908,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new Destructor; + node.comment = comment; + comment = null; if (expect(tok!"~") is null) return null; if (expect(tok!"this") is null) return null; if (expect(tok!"(") is null) return null; @@ -2005,6 +2011,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; node.name = advance(); else node.name.line = tokens[index - 1].line; // preserve line number if anonymous + node.comment = comment; + comment = null; if (currentIs(tok!":")) { advance(); @@ -2026,6 +2034,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c; { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new EnumMember; + node.comment = current.comment; if (currentIs(tok!"identifier")) { if (peekIsOneOf(tok!",", tok!"}")) @@ -2425,6 +2434,8 @@ body {} // six { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new FunctionDeclaration; + node.comment = comment; + comment = null; if (isAuto) goto functionName; @@ -2994,6 +3005,8 @@ import core.stdc.stdio, std.string : KeepTerminator; auto ident = expect(tok!"identifier"); if (ident is null) return null; node.name = *ident; + node.comment = comment; + comment = null; if (currentIs(tok!"(")) { node.templateParameters = parseTemplateParameters(); @@ -3129,7 +3142,7 @@ invariant() foo(); if (expect(tok!")") is null) return null; return node; } - + unittest { auto sourceCode = q{is ( x : uybte)}c; @@ -3410,7 +3423,7 @@ invariant() foo(); node.symbol = parseSymbol(); return node; } - + unittest { } @@ -4585,6 +4598,8 @@ q{(int a, ...) { node.name = advance(); } + node.comment = comment; + comment = null; if (currentIs(tok!"(")) { @@ -4856,6 +4871,8 @@ q{(int a, ...) { mixin(traceEnterAndExit!(__FUNCTION__)); auto node = new TemplateDeclaration; + node.comment = comment; + comment = null; if (currentIs(tok!"enum")) { node.eponymousTemplateDeclaration = parseEponymousTemplateDeclaration(); @@ -5784,7 +5801,7 @@ q{doStuff(5)}c; Unittest parseUnittest() { mixin(traceEnterAndExit!(__FUNCTION__)); - mixin (simpleParse!(Unittest, tok!"unittest", "blockStatement|parseBlockStatement")); + mixin (simpleParse!(Unittest, tok!"unittest", "blockStatement|parseBlockStatement", true)); } /** @@ -6490,9 +6507,16 @@ protected: template simpleParse(NodeType, parts ...) { - enum simpleParse = "auto node = new " ~ NodeType.stringof ~ ";\n" - ~ simpleParseItems!(parts) - ~ "\nreturn node;\n"; + static if (__traits(hasMember, NodeType, "comment")) + enum simpleParse = "auto node = new " ~ NodeType.stringof ~ ";\n" + ~ "node.comment = comment;\n" + ~ "comment = null;\n" + ~ simpleParseItems!(parts) + ~ "\nreturn node;\n"; + else + enum simpleParse = "auto node = new " ~ NodeType.stringof ~ ";\n" + ~ simpleParseItems!(parts) + ~ "\nreturn node;\n"; } template simpleParseItems(items ...) @@ -6609,4 +6633,5 @@ protected: int suppressMessages; size_t index; int _traceDepth; + string comment; } diff --git a/stdx/lexer.d b/stdx/lexer.d index 980e530..537e6a1 100644 --- a/stdx/lexer.d +++ b/stdx/lexer.d @@ -33,7 +33,7 @@ template TokenIdType(alias staticTokens, alias dynamicTokens, static assert (false); } -string TokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property +string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type) @property { if (type == 0) return "!ERROR!"; @@ -90,7 +90,7 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens, } } -struct TokenStructure(IDType) +struct TokenStructure(IDType, string extraFields = "") { bool opEquals(IDType type) const pure nothrow @safe { @@ -116,6 +116,7 @@ struct TokenStructure(IDType) size_t column; size_t index; IDType type; + mixin (extraFields); } mixin template Lexer(R, IDType, Token, alias defaultTokenFunction, @@ -210,7 +211,7 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction, return _front; } - void popFront() pure + void _popFront() pure { _front = advance(); } @@ -277,20 +278,20 @@ struct LexerRange(BufferType) if (isBuffer!BufferType) column = 1; line = 1; } - + void popFront() pure { index++; column++; range.popFront(); } - + void incrementLine() pure nothrow { column = 1; line++; } - + BufferType range; alias range this; size_t index;