/******************************************************************************* * Boost Software License - Version 1.0 - August 17th, 2003 * * Permission is hereby granted, free of charge, to any person or organization * obtaining a copy of the software and accompanying documentation covered by * this license (the "Software") to use, reproduce, display, distribute, * execute, and transmit the Software, and to prepare derivative works of the * Software, and to permit third-parties to whom the Software is furnished to * do so, all subject to the following: * * The copyright notices in the Software and this entire statement, including * the above license grant, this restriction and the following disclaimer, * must be included in all copies of the Software, in whole or in part, and * all derivative works of the Software, unless such copies or derivative * works are solely in the form of machine-executable object code generated by * a source language processor. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ******************************************************************************/ module dfmt; import std.stdio; import std.d.lexer; import std.d.parser; import std.d.formatter; import std.d.ast; import std.array; immutable USAGE = "usage: %s [--inplace] [...] Formats D code. --inplace change file in-place instead of outputing to stdout (implicit in case of multiple files) -h, --help display this help and exit "; version (NoMain) { } else int main(string[] args) { import std.getopt; bool inplace = false; bool show_usage = false; getopt(args, "help|h", &show_usage, "inplace", &inplace); if (show_usage) { import std.path: baseName; writef(USAGE, baseName(args[0])); return 0; } File output = stdout; ubyte[] buffer; args.popFront(); if (args.length == 0) { ubyte[4096] inputBuffer; ubyte[] b; while (true) { b = stdin.rawRead(inputBuffer); if (b.length) buffer ~= b; else break; } format("stdin", buffer, output); } else { import std.file; if (args.length >= 2) inplace = true; while (args.length > 0) { const path = args.front; args.popFront(); if (isDir(path)) { inplace = true; foreach (string name; dirEntries(path, "*.d", SpanMode.depth)) { args ~= name; } continue; } File f = File(path); buffer = new ubyte[](cast(size_t)f.size); f.rawRead(buffer); if (inplace) output = File(path, "w"); format(path, buffer, output); } } return 0; } void format(Output)(string source_desc, ubyte[] buffer, Output output) { LexerConfig config; config.stringBehavior = StringBehavior.source; config.whitespaceBehavior = WhitespaceBehavior.skip; LexerConfig parseConfig; parseConfig.stringBehavior = StringBehavior.source; parseConfig.whitespaceBehavior = WhitespaceBehavior.skip; StringCache cache = StringCache(StringCache.defaultBucketCount); ASTInformation astInformation; FormatterConfig formatterConfig; auto parseTokens = getTokensForParser(buffer, parseConfig, &cache); auto mod = parseModule(parseTokens, source_desc); auto visitor = new FormatVisitor(&astInformation); visitor.visit(mod); astInformation.cleanup(); auto tokens = byToken(buffer, config, &cache).array(); auto tokenFormatter = TokenFormatter!Output(tokens, output, &astInformation, &formatterConfig); tokenFormatter.format(); } struct TokenFormatter(Output) { this(const(Token)[] tokens, Output output, ASTInformation* astInformation, FormatterConfig* config) { this.tokens = tokens; this.output = output; this.astInformation = astInformation; this.config = config; } void format() { while (index < tokens.length) formatStep(); } invariant { assert (indentLevel >= 0); } private: void formatStep() { import std.range:assumeSorted; assert (index < tokens.length); if (current.type == tok!"comment") { const i = index; if (i > 0) { if (tokens[i-1].line < tokens[i].line) { if (tokens[i-1].type != tok!"comment" && tokens[i-1].type != tok!"{") newline(); } else write(" "); } writeToken(); if (i >= tokens.length-1) newline(); else if (tokens[i+1].line > tokens[i].line) newline(); else if (tokens[i+1].type != tok!"{") write(" "); } else if (isStringLiteral(current.type) || isNumberLiteral(current.type) || current.type == tok!"characterLiteral") { writeToken(); } else if (current.type == tok!"module" || current.type == tok!"import") { auto t = current.type; writeToken(); write(" "); while (index < tokens.length) { if (current.type == tok!";") { writeToken(); tempIndent = 0; if (!(t == tok!"import" && current.type == tok!"import")) write("\n"); newline(); break; } else if (current.type == tok!",") { // compute length until next , or ; int length_of_next_chunk = INVALID_TOKEN_LENGTH; for (size_t i=index+1; i= 0); length_of_next_chunk += len; } assert (length_of_next_chunk > 0); writeToken(); if (currentLineLength+1+length_of_next_chunk >= config.columnSoftLimit) { pushIndent(); newline(); } else write(" "); } else formatStep(); } } else if (current.type == tok!"return") { writeToken(); write(" "); } else if (current.type == tok!"switch") formatSwitch(); else if (current.type == tok!"for" || current.type == tok!"foreach" || current.type == tok!"foreach_reverse" || current.type == tok!"while" || current.type == tok!"if") { currentLineLength += currentTokenLength() + 1; writeToken(); write(" "); writeParens(false); if (current.type != tok!"{" && current.type != tok!";") { pushIndent(); newline(); } } else if (isKeyword(current.type)) { switch (current.type) { case tok!"default": case tok!"cast": writeToken(); break; case tok!"mixin": writeToken(); write(" "); break; default: if (index + 1 < tokens.length) { auto next = tokens[index + 1]; if (next.type == tok!";" || next.type == tok!"(" || next.type == tok!")" || next.type == tok!"," || next.type == tok!"{" || next.type == tok!".") { writeToken(); } else { writeToken(); write(" "); } } else writeToken(); break; } } else if (isBasicType(current.type)) { writeToken(); if (current.type == tok!"identifier" || isKeyword(current.type)) write(" "); } else if (isOperator(current.type)) { switch (current.type) { case tok!"*": if (!assumeSorted(astInformation.spaceAfterLocations) .equalRange(current.index).empty) { writeToken(); write(" "); break; } goto case; case tok!"~": case tok!"&": case tok!"+": case tok!"-": if (!assumeSorted(astInformation.unaryLocations) .equalRange(current.index).empty) { writeToken(); break; } goto binary; case tok!"(": writeParens(true); break; case tok!":": if (!assumeSorted(astInformation.ternaryColonLocations) .equalRange(current.index).empty) { write(" "); writeToken(); write(" "); } else writeToken(); break; case tok!"@": case tok!"!": case tok!"...": case tok!"[": case tok!"++": case tok!"--": case tok!"$": writeToken(); break; case tok!"]": writeToken(); if (current.type == tok!"identifier") write(" "); break; case tok!";": tempIndent = 0; writeToken(); if (current.type != tok!"comment") newline(); break; case tok!"{": writeBraces(); break; case tok!".": if (currentLineLength + nextTokenLength() >= config.columnSoftLimit) { pushIndent(); newline(); writeToken(); } else writeToken(); break; case tok!",": if (currentLineLength + nextTokenLength() >= config.columnSoftLimit) { pushIndent(); writeToken(); newline(); } else { writeToken(); write(" "); } break; case tok!"^^": case tok!"^=": case tok!"^": case tok!"~=": case tok!"<<=": case tok!"<<": case tok!"<=": case tok!"<>=": case tok!"<>": case tok!"<": case tok!"==": case tok!"=>": case tok!"=": case tok!">=": case tok!">>=": case tok!">>>=": case tok!">>>": case tok!">>": case tok!">": case tok!"|=": case tok!"||": case tok!"|": case tok!"-=": case tok!"!<=": case tok!"!<>=": case tok!"!<>": case tok!"!<": case tok!"!=": case tok!"!>=": case tok!"!>": case tok!"?": case tok!"/=": case tok!"/": case tok!"..": case tok!"*=": case tok!"&=": case tok!"&&": case tok!"%=": case tok!"%": case tok!"+=": binary: if (currentLineLength + nextTokenLength() >= config.columnSoftLimit) { pushIndent(); newline(); } else write(" "); writeToken(); write(" "); break; default: assert (false, str(current.type)); } } else if (current.type == tok!"identifier") { writeToken(); if (current.type == tok!"identifier" || isKeyword(current.type) || current.type == tok!"@") write(" "); } else assert (false, str(current.type)); } /// Pushes a temporary indent level void pushIndent() { if (tempIndent == 0) tempIndent++; } /// Pops a temporary indent level void popIndent() { if (tempIndent > 0) tempIndent--; } /// Writes balanced braces void writeBraces() { import std.range : assumeSorted; int depth = 0; do { if (current.type == tok!"{") { depth++; if (config.braceStyle == BraceStyle.otbs) { write(" "); write("{"); } else { newline(); write("{"); } indentLevel++; index++; newline(); } else if (current.type == tok!"}") { // Silly hack to format enums better. if (peekBackIs(tok!"identifier")) newline(); write("}"); depth--; if (index < tokens.length-1 && assumeSorted(astInformation.doubleNewlineLocations) .equalRange(tokens[index].index).length) { output.write("\n"); } if (config.braceStyle == BraceStyle.otbs) { index++; if (index < tokens.length && current.type == tok!"else") write(" "); else { if (peekIs(tok!"case") || peekIs(tok!"default")) indentLevel--; newline(); } } else { index++; if (peekIs(tok!"case") || peekIs(tok!"default")) indentLevel--; newline(); } } else formatStep(); } while (index < tokens.length && depth > 0); popIndent(); } void writeParens(bool space_afterwards) in { assert (current.type == tok!"(", str(current.type)); } body { immutable t = tempIndent; int depth = 0; do { if (current.type == tok!";") { write("; "); currentLineLength += 2; index++; continue; } else if (current.type == tok!"(") { writeToken(); depth++; continue; } else if (current.type == tok!")") { if (peekIs(tok!"identifier") || (index + 1 < tokens.length && isKeyword(tokens[index + 1].type))) { writeToken(); if (space_afterwards) write(" "); } else writeToken(); depth--; } else formatStep(); } while (index < tokens.length && depth > 0); popIndent(); tempIndent = t; } bool peekIsLabel() { return peekIs(tok!"identifier") && peek2Is(tok!":"); } void formatSwitch() { immutable l = indentLevel; writeToken(); // switch write(" "); writeParens(true); if (current.type != tok!"{") return; if (config.braceStyle == BraceStyle.otbs) write(" "); else newline(); writeToken(); newline(); while (index < tokens.length) { if (current.type == tok!"case") { writeToken(); write(" "); } else if (current.type == tok!":") { if (peekIs(tok!"..")) { writeToken(); write(" "); writeToken(); write(" "); } else { if (!(peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel())) indentLevel++; formatStep(); newline(); } } else { assert (current.type != tok!"}"); if (peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel()) { indentLevel = l; formatStep(); } else { formatStep(); if (current.type == tok!"}") break; } } } indentLevel = l; assert (current.type == tok!"}"); writeToken(); newline(); } int tokenLength(size_t i) pure @safe @nogc { import std.algorithm : countUntil; assert (i+1 <= tokens.length); switch (tokens[i].type) { case tok!"identifier": case tok!"stringLiteral": case tok!"wstringLiteral": case tok!"dstringLiteral": auto c = cast(int) tokens[i].text.countUntil('\n'); if (c == -1) return cast(int) tokens[i].text.length; mixin (generateFixedLengthCases()); default: return INVALID_TOKEN_LENGTH; } } int currentTokenLength() pure @safe @nogc { return tokenLength(index); } int nextTokenLength() pure @safe @nogc { if (index + 1 >= tokens.length) return INVALID_TOKEN_LENGTH; return tokenLength(index + 1); } ref current() const @property in { assert (index < tokens.length); } body { return tokens[index]; } bool peekBackIs(IdType tokenType) { return (index >= 1) && tokens[index - 1].type == tokenType; } bool peekImplementation(IdType tokenType, size_t n) { auto i = index + n; while (i < tokens.length && tokens[i].type == tok!"comment") i++; return i < tokens.length && tokens[i].type == tokenType; } bool peek2Is(IdType tokenType) { return peekImplementation(tokenType, 2); } bool peekIs(IdType tokenType) { return peekImplementation(tokenType, 1); } void newline() { output.write("\n"); currentLineLength = 0; if (index < tokens.length) { if (current.type == tok!"}") indentLevel--; indent(); } } void write(string str) { currentLineLength += str.length; output.write(str); } void writeToken() { currentLineLength += currentTokenLength(); if (current.text is null) output.write(str(current.type)); else output.write(current.text); index++; } void indent() { import std.range : repeat, take; if (config.useTabs) foreach (i; 0 .. indentLevel + tempIndent) { currentLineLength += config.tabSize; output.write("\t"); } else foreach (i; 0 .. indentLevel + tempIndent) foreach (j; 0 .. config.indentSize) { output.write(" "); currentLineLength++; } } /// Length of an invalid token enum int INVALID_TOKEN_LENGTH = -1; /// Current index into the tokens array size_t index; /// Current indent level int indentLevel; /// Current temproray indententation level; int tempIndent; /// Length of the current line (so far) uint currentLineLength = 0; /// Output to write output to Output output; /// Tokens being formatted const(Token)[] tokens; /// Information about the AST ASTInformation* astInformation; /// Configuration FormatterConfig* config; } /// The only good brace styles enum BraceStyle { allman, otbs } /// Configuration options for formatting struct FormatterConfig { /// Number of spaces used for indentation uint indentSize = 4; /// Use tabs or spaces bool useTabs = false; /// Size of a tab character uint tabSize = 8; /// Soft line wrap limit uint columnSoftLimit = 80; /// Hard line wrap limit uint columnHardLimit = 120; /// Use the One True Brace Style BraceStyle braceStyle = BraceStyle.allman; } /// struct ASTInformation { /// Sorts the arrays so that binary search will work on them void cleanup() { import std.algorithm : sort; sort(doubleNewlineLocations); sort(spaceAfterLocations); sort(unaryLocations); } /// Locations of end braces for struct bodies size_t[] doubleNewlineLocations; /// Locations of tokens where a space is needed (such as the '*' in a type) size_t[] spaceAfterLocations; /// Locations of unary operators size_t[] unaryLocations; /// Locations of ':' operators in ternary expressions size_t[] ternaryColonLocations; } /// Collects information from the AST that is useful for the formatter final class FormatVisitor : ASTVisitor { /// this(ASTInformation* astInformation) { this.astInformation = astInformation; } override void visit(const FunctionBody functionBody) { if (functionBody.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.blockStatement.endLocation; if (functionBody.inStatement !is null && functionBody.inStatement.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.inStatement.blockStatement.endLocation; if (functionBody.outStatement !is null && functionBody.outStatement.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.outStatement.blockStatement.endLocation; if (functionBody.bodyStatement !is null && functionBody.bodyStatement.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.bodyStatement.blockStatement.endLocation; functionBody.accept(this); } override void visit(const EnumBody enumBody) { astInformation.doubleNewlineLocations ~= enumBody.endLocation; enumBody.accept(this); } override void visit(const Unittest unittest_) { astInformation.doubleNewlineLocations ~= unittest_.blockStatement.endLocation; unittest_.accept(this); } override void visit(const Invariant invariant_) { astInformation.doubleNewlineLocations ~= invariant_.blockStatement.endLocation; invariant_.accept(this); } override void visit(const StructBody structBody) { astInformation.doubleNewlineLocations ~= structBody.endLocation; structBody.accept(this); } override void visit(const TemplateDeclaration templateDeclaration) { astInformation.doubleNewlineLocations ~= templateDeclaration.endLocation; templateDeclaration.accept(this); } override void visit(const TypeSuffix typeSuffix) { if (typeSuffix.star.type != tok!"") astInformation.spaceAfterLocations ~= typeSuffix.star.index; typeSuffix.accept(this); } override void visit(const UnaryExpression unary) { if (unary.prefix.type == tok!"~" || unary.prefix.type == tok!"&" || unary.prefix.type == tok!"*" || unary.prefix.type == tok!"+" || unary.prefix.type == tok!"-") { astInformation.unaryLocations ~= unary.prefix.index; } unary.accept(this); } override void visit(const TernaryExpression ternary) { if (ternary.colon.type != tok!"") astInformation.ternaryColonLocations ~= ternary.colon.index; ternary.accept(this); } private: ASTInformation* astInformation; alias visit = ASTVisitor.visit; } string generateFixedLengthCases() { import std.algorithm:map; import std.string:format; string[] fixedLengthTokens = [ "abstract", "alias", "align", "asm", "assert", "auto", "body", "bool", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float", "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override", "package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", "version", "void", "volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~=" ]; return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t"); }