/******************************************************************************* * Boost Software License - Version 1.0 - August 17th, 2003 * * Permission is hereby granted, free of charge, to any person or organization * obtaining a copy of the software and accompanying documentation covered by * this license (the "Software") to use, reproduce, display, distribute, * execute, and transmit the Software, and to prepare derivative works of the * Software, and to permit third-parties to whom the Software is furnished to * do so, all subject to the following: * * The copyright notices in the Software and this entire statement, including * the above license grant, this restriction and the following disclaimer, * must be included in all copies of the Software, in whole or in part, and * all derivative works of the Software, unless such copies or derivative * works are solely in the form of machine-executable object code generated by * a source language processor. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ******************************************************************************/ module dfmt; import std.stdio; import std.d.lexer; import std.d.parser; import std.d.formatter; import std.d.ast; import std.array; version (NoMain) { } else int main(string[] args) { import std.getopt : getopt; bool inplace = false; bool show_usage = false; FormatterConfig formatterConfig; getopt(args, "help|h", &show_usage, "inplace", &inplace, "tabs|t", &formatterConfig.useTabs, "braces", &formatterConfig.braceStyle); if (show_usage) { import std.path: baseName; writef(USAGE, baseName(args[0])); return 0; } File output = stdout; ubyte[] buffer; args.popFront(); if (args.length == 0) { ubyte[4096] inputBuffer; ubyte[] b; while (true) { b = stdin.rawRead(inputBuffer); if (b.length) buffer ~= b; else break; } format("stdin", buffer, output.lockingTextWriter(), &formatterConfig); } else { import std.file : dirEntries, isDir, SpanMode; if (args.length >= 2) inplace = true; while (args.length > 0) { const path = args.front; args.popFront(); if (isDir(path)) { inplace = true; foreach (string name; dirEntries(path, "*.d", SpanMode.depth)) { args ~= name; } continue; } File f = File(path); buffer = new ubyte[](cast(size_t) f.size); f.rawRead(buffer); if (inplace) output = File(path, "w"); format(path, buffer, output.lockingTextWriter(), &formatterConfig); } } return 0; } private: immutable USAGE = "usage: %s [--inplace] [...] Formats D code. --inplace Change file in-place instead of outputing to stdout (implicit in case of multiple files) --tabs | -t Use tabs instead of spaces for indentation --braces=allman Use Allman indent style (default) --braces=otbs Use the One True Brace Style --help | -h Display this help and exit "; void format(OutputRange)(string source_desc, ubyte[] buffer, OutputRange output, FormatterConfig* formatterConfig) { LexerConfig config; config.stringBehavior = StringBehavior.source; config.whitespaceBehavior = WhitespaceBehavior.skip; LexerConfig parseConfig; parseConfig.stringBehavior = StringBehavior.source; parseConfig.whitespaceBehavior = WhitespaceBehavior.skip; StringCache cache = StringCache(StringCache.defaultBucketCount); ASTInformation astInformation; auto parseTokens = getTokensForParser(buffer, parseConfig, &cache); auto mod = parseModule(parseTokens, source_desc); auto visitor = new FormatVisitor(&astInformation); visitor.visit(mod); astInformation.cleanup(); auto tokens = byToken(buffer, config, &cache).array(); auto tokenFormatter = TokenFormatter!OutputRange(tokens, output, &astInformation, formatterConfig); tokenFormatter.format(); } struct TokenFormatter(OutputRange) { /** * Params: * tokens = the tokens to format * output = the output range that the code will be formatted to * astInformation = information about the AST used to inform formatting * decisions. */ this(const(Token)[] tokens, OutputRange output, ASTInformation* astInformation, FormatterConfig* config) { this.tokens = tokens; this.output = output; this.astInformation = astInformation; this.config = config; } /// Runs the foramtting process void format() { while (index < tokens.length) formatStep(); } private: void formatStep() { import std.algorithm : canFind; assert (index < tokens.length); if (currentIs(tok!"comment")) { if (index > 0) { if (tokens[index - 1].type != tok!";" && tokens[index - 1].type != tok!"}" && tokens[index - 1].line + 1 < tokens[index].line) { newline(); } else if (tokens[index - 1].line == tokens[index].line) write(" "); } writeToken(); auto j = justAddedExtraNewline; if (tokens[index - 1].text[0 .. 2] == "//") { newline(); justAddedExtraNewline = j; } else if (index < tokens.length) { if (tokens[index - 1].line == tokens[index].line) { if (tokens[index].type != tok!"{") write(" "); } else if (!currentIs(tok!"{")) newline(); } else newline(); } else if (isStringLiteral(current.type) || isNumberLiteral(current.type) || currentIs(tok!"characterLiteral")) { writeToken(); } else if (currentIs(tok!"module") || currentIs(tok!"import")) { auto t = current.type; writeToken(); if (currentIs(tok!"(")) { writeParens(false); return; } write(" "); while (index < tokens.length) { if (currentIs(tok!";")) { writeToken(); if (index >= tokens.length) { newline(); break; } if (currentIs(tok!"comment") && current.line == peekBack().line) { justAddedExtraNewline = true; break; } else if ((t == tok!"import" && !currentIs(tok!"import"))) { write("\n"); currentLineLength = 0; justAddedExtraNewline = true; newline(); } else newline(); break; } else if (currentIs(tok!",")) { // compute length until next , or ; int length_of_next_chunk = INVALID_TOKEN_LENGTH; for (size_t i=index+1; i= 0); length_of_next_chunk += len; } assert (length_of_next_chunk > 0); writeToken(); if (currentLineLength + 1 + length_of_next_chunk >= config.columnSoftLimit) { if (indents.tempIndents < 2) indents.push(tok!","); newline(); } else write(" "); } else formatStep(); } } else if (currentIs(tok!"return")) { writeToken(); if (!currentIs(tok!";") && !currentIs(tok!")")) write(" "); } else if (currentIs(tok!"with")) { if (indents.length == 0 || indents.top != tok!"switch") indents.push(tok!"with"); writeToken(); write(" "); if (currentIs(tok!"(")) writeParens(false); if (!currentIs(tok!"switch") && !currentIs(tok!"with") && !currentIs(tok!"{")) { newline(); } else if (!currentIs(tok!"{")) write(" "); } else if (currentIs(tok!"switch")) { if (indents.length == 0 || indents.top != tok!"with") indents.push(tok!"switch"); writeToken(); // switch write(" "); } else if (currentIs(tok!"extern") && peekIs(tok!"(")) { writeToken(); write(" "); } else if ((isBlockHeader() || currentIs(tok!"version") || currentIs(tok!"debug")) && peekIs(tok!"(", false)) { immutable bool a = !currentIs(tok!"version") && !currentIs(tok!"debug") ; immutable bool b = a || astInformation.conditionalWithElseLocations .canFindIndex(current.index); immutable bool shouldPushIndent = b || astInformation.conditionalStatementLocations .canFindIndex(current.index); if (shouldPushIndent) indents.push(current.type); writeToken(); write(" "); writeParens(false); if (currentIs(tok!"switch") || (currentIs(tok!"final") && peekIs(tok!"switch"))) write(" "); else if (currentIs(tok!"comment")) formatStep(); else if (!shouldPushIndent) { if (!currentIs(tok!"{") && !currentIs(tok!";")) write(" "); } else if (!currentIs(tok!"{") && !currentIs(tok!";")) newline(); } else if (currentIs(tok!"else")) { writeToken(); if (currentIs(tok!"if") || (currentIs(tok!"static") && peekIs(tok!"if")) || currentIs(tok!"version")) { if (indents.top() == tok!"if" || indents.top == tok!"version") indents.pop(); write(" "); } else if (!currentIs(tok!"{") && !currentIs(tok!"comment")) { if (indents.top() == tok!"if" || indents.top == tok!"version") indents.pop(); indents.push(tok!"else"); newline(); } } else if (isKeyword(current.type)) { switch (current.type) { case tok!"default": writeToken(); break; case tok!"cast": writeToken(); break; case tok!"in": case tok!"is": writeToken(); if (!currentIs(tok!"(") && !currentIs(tok!"{")) write(" "); break; case tok!"case": writeToken(); write(" "); break; default: if (index + 1 < tokens.length) { if (!peekIs(tok!"@") && peekIsOperator()) writeToken(); else { writeToken(); write(" "); } } else writeToken(); break; } } else if (isBasicType(current.type)) { writeToken(); if (currentIs(tok!"identifier") || isKeyword(current.type)) write(" "); } else if (isOperator(current.type)) { switch (current.type) { case tok!"*": if (astInformation.spaceAfterLocations.canFindIndex(current.index)) { writeToken(); if (!currentIs(tok!"*") && !currentIs(tok!")") && !currentIs(tok!"[") && !currentIs(tok!",") && !currentIs(tok!";")) { write(" "); } break; } else if (!astInformation.unaryLocations.canFindIndex(current.index)) goto binary; else writeToken(); break; case tok!"~": if (peekIs(tok!"this")) { if (!(index == 0 || peekBackIs(tok!"{") || peekBackIs(tok!"}") || peekBackIs(tok!";"))) { write(" "); } writeToken(); break; } else goto case; case tok!"&": case tok!"+": case tok!"-": if (astInformation.unaryLocations.canFindIndex(current.index)) { writeToken(); break; } goto binary; case tok!"(": spaceAfterParens = true; writeToken(); parenDepth++; if (linebreakHints.canFindIndex(index - 1) || (linebreakHints.length == 0 && currentLineLength > config.columnSoftLimit && !currentIs(tok!")"))) { indents.push(tok!"("); newline(); } regenLineBreakHintsIfNecessary(index - 1); break; case tok!")": parenDepth--; if (parenDepth == 0) while (indents.length > 0 && isWrapIndent(indents.top)) indents.pop(); if (parenDepth == 0 && (peekIs(tok!"in") || peekIs(tok!"out") || peekIs(tok!"body"))) { writeToken(); // ) newline(); writeToken(); // in/out/body } else if (peekIsLiteralOrIdent() || peekIsBasicType() || peekIsKeyword()) { writeToken(); if (spaceAfterParens || parenDepth > 0) write(" "); } else if ((peekIsKeyword() || peekIs(tok!"@")) && spaceAfterParens) { writeToken(); write(" "); } else writeToken(); break; case tok!"!": if (peekIs(tok!"is")) write(" "); goto case; case tok!"@": case tok!"...": case tok!"[": case tok!"++": case tok!"--": case tok!"$": writeToken(); break; case tok!":": if (astInformation.caseEndLocations.canFindIndex(current.index) || astInformation.attributeDeclarationLines.canFindIndex( current.line)) { writeToken(); if (!currentIs(tok!"{")) newline(); } else if (peekBackIs(tok!"identifier") && (peekBack2Is(tok!"{", true) || peekBack2Is(tok!"}", true) || peekBack2Is(tok!";", true) || peekBack2Is(tok!":", true)) && !(isBlockHeader(1) && !peekIs(tok!"if"))) { writeToken(); if (!currentIs(tok!"{")) newline(); } else { if (peekIs(tok!"..")) writeToken(); else if (isBlockHeader(1) && !peekIs(tok!"if")) { writeToken(); write(" "); } else { write(" : "); index++; } } break; case tok!"]": writeToken(); if (currentIs(tok!"identifier")) write(" "); break; case tok!";": if (parenDepth > 0) { if (!(peekIs(tok!";") || peekIs(tok!")") || peekIs(tok!"}"))) write("; "); else write(";"); index++; } else { writeToken(); linebreakHints = []; newline(); } break; case tok!"{": if (astInformation.structInitStartLocations.canFindIndex( tokens[index].index)) { writeToken(); } else if (astInformation.funLitStartLocations.canFindIndex( tokens[index].index)) { if (peekBackIs(tok!")")) write(" "); writeToken(); write(" "); } else { if (!justAddedExtraNewline && !peekBackIs(tok!"{") && !peekBackIs(tok!"}") && !peekBackIs(tok!";") && !peekBackIs(tok!";")) { if (config.braceStyle == BraceStyle.otbs) { if (!astInformation.structInitStartLocations.canFindIndex(tokens[index].index) && !astInformation.funLitStartLocations.canFindIndex(tokens[index].index)) { while (indents.length && isWrapIndent(indents.top)) indents.pop(); indents.push(tok!"{"); if (index == 1 || peekBackIs(tok!":", true) || peekBackIs(tok!"{", true) || peekBackIs(tok!"}", true) || peekBackIs(tok!")", true) || peekBackIs(tok!";", true)) { indentLevel = indents.indentSize - 1; } } write(" "); } else if (index > 0 && (!peekBackIs(tok!"comment") || tokens[index - 1].text[0 .. 2] != "//")) newline(); } writeToken(); newline(); } break; case tok!"}": if (astInformation.structInitEndLocations.canFindIndex( tokens[index].index)) { writeToken(); } else if (astInformation.funLitEndLocations.canFindIndex( tokens[index].index)) { write(" "); writeToken(); } else { // Silly hack to format enums better. if (peekBackIsLiteralOrIdent() || peekBackIs(tok!",")) newline(); write("}"); if (index < tokens.length - 1 && astInformation.doubleNewlineLocations.canFindIndex( tokens[index].index) && !peekIs(tok!"}")) { write("\n"); currentLineLength = 0; justAddedExtraNewline = true; } if (config.braceStyle == BraceStyle.otbs && currentIs(tok!"else")) write(" "); if (!peekIs(tok!",") && !peekIs(tok!")")) { index++; newline(); } else index++; } break; case tok!".": if (linebreakHints.canFind(index) || (linebreakHints.length == 0 && currentLineLength + nextTokenLength() > config.columnHardLimit)) { if (indents.tempIndents < 2) indents.push(tok!"."); newline(); } writeToken(); break; case tok!",": if (!peekIs(tok!"}") && (linebreakHints.canFind(index) || (linebreakHints.length == 0 && currentLineLength > config.columnSoftLimit))) { writeToken(); if (indents.tempIndents < 2) indents.push(tok!","); newline(); } else { writeToken(); if (!currentIs(tok!")", false) && !currentIs(tok!"]", false) && !currentIs(tok!"}", false) && !currentIs(tok!"comment", false)) { write(" "); } } regenLineBreakHintsIfNecessary(index - 1); break; case tok!"=": case tok!">=": case tok!">>=": case tok!">>>=": case tok!"|=": case tok!"-=": case tok!"/=": case tok!"*=": case tok!"&=": case tok!"%=": case tok!"+=": write(" "); writeToken(); write(" "); regenLineBreakHintsIfNecessary(index - 1); break; case tok!"&&": case tok!"||": regenLineBreakHintsIfNecessary(index); goto case; case tok!"^^": case tok!"^=": case tok!"^": case tok!"~=": case tok!"<<=": case tok!"<<": case tok!"<=": case tok!"<>=": case tok!"<>": case tok!"<": case tok!"==": case tok!"=>": case tok!">>>": case tok!">>": case tok!">": case tok!"|": case tok!"!<=": case tok!"!<>=": case tok!"!<>": case tok!"!<": case tok!"!=": case tok!"!>=": case tok!"!>": case tok!"?": case tok!"/": case tok!"..": case tok!"%": binary: if (linebreakHints.canFind(index)) { if (indents.tempIndents < 2) indents.push(current.type); newline(); } else write(" "); writeToken(); write(" "); break; default: writeToken(); break; } } else if (currentIs(tok!"identifier")) { writeToken(); if (index < tokens.length && (currentIs(tok!"identifier") || isKeyword(current.type) || isBasicType(current.type) || currentIs(tok!"@"))) { write(" "); } } else if (currentIs(tok!"scriptLine")) { writeToken(); newline(); } else writeToken(); } void regenLineBreakHintsIfNecessary(immutable size_t i) { if (linebreakHints.length == 0 || linebreakHints[$ - 1] <= i - 1) { immutable size_t j = expressionEndIndex(i); linebreakHints = chooseLineBreakTokens(i, tokens[i .. j], config, currentLineLength, indentLevel); } } size_t expressionEndIndex(size_t i) const pure @safe @nogc { int parenDepth = 0; int bracketDepth = 0; int braceDepth = 0; loop : while (i < tokens.length) switch (tokens[i].type) { case tok!"(": parenDepth++; i++; break; case tok!"{": braceDepth++; i++; break; case tok!"[": bracketDepth++; i++; break; case tok!")": parenDepth--; if (parenDepth <= 0) break loop; i++; break; case tok!"}": braceDepth--; if (braceDepth <= 0) break loop; i++; break; case tok!"]": bracketDepth--; if (bracketDepth <= 0) break loop; i++; break; case tok!";": break loop; default: i++; break; } return i; } void writeParens(bool spaceAfter) in { assert(currentIs(tok!"("), str(current.type)); } body { immutable int depth = parenDepth; do { formatStep(); spaceAfterParens = spaceAfter; } while (index < tokens.length && parenDepth > depth); } bool peekIsKeyword() { return index + 1 < tokens.length && isKeyword(tokens[index + 1].type); } bool peekIsBasicType() { return index + 1 < tokens.length && isBasicType(tokens[index + 1].type); } bool peekIsLabel() { return peekIs(tok!"identifier") && peek2Is(tok!":"); } int currentTokenLength() pure @safe @nogc { return tokenLength(tokens[index]); } int nextTokenLength() pure @safe @nogc { immutable size_t i = index + 1; if (i >= tokens.length) return INVALID_TOKEN_LENGTH; return tokenLength(tokens[i]); } ref current() const @property in { assert (index < tokens.length); } body { return tokens[index]; } const(Token) peekBack() { assert (index > 0); return tokens[index - 1]; } bool peekBackIsLiteralOrIdent() { if (index == 0) return false; switch (tokens[index - 1].type) { case tok!"doubleLiteral": case tok!"floatLiteral": case tok!"idoubleLiteral": case tok!"ifloatLiteral": case tok!"intLiteral": case tok!"longLiteral": case tok!"realLiteral": case tok!"irealLiteral": case tok!"uintLiteral": case tok!"ulongLiteral": case tok!"characterLiteral": case tok!"identifier": case tok!"stringLiteral": case tok!"wstringLiteral": case tok!"dstringLiteral": return true; default: return false; } } bool peekIsLiteralOrIdent() { if (index + 1 >= tokens.length) return false; switch (tokens[index + 1].type) { case tok!"doubleLiteral": case tok!"floatLiteral": case tok!"idoubleLiteral": case tok!"ifloatLiteral": case tok!"intLiteral": case tok!"longLiteral": case tok!"realLiteral": case tok!"irealLiteral": case tok!"uintLiteral": case tok!"ulongLiteral": case tok!"characterLiteral": case tok!"identifier": case tok!"stringLiteral": case tok!"wstringLiteral": case tok!"dstringLiteral": return true; default: return false; } } bool peekBackIs(IdType tokenType, bool ignoreComments = false) { return peekImplementation(tokenType, -1, ignoreComments); } bool peekBack2Is(IdType tokenType, bool ignoreComments = false) { return peekImplementation(tokenType, -2, ignoreComments); } bool peekImplementation(IdType tokenType, int n, bool ignoreComments = true) { auto i = index + n; if (ignoreComments) while (n != 0 && i < tokens.length && tokens[i].type == tok!"comment") i = n > 0 ? i + 1 : i - 1; return i < tokens.length && tokens[i].type == tokenType; } bool peek2Is(IdType tokenType, bool ignoreComments = true) { return peekImplementation(tokenType, 2, ignoreComments); } bool peekIsOperator() { return index + 1 < tokens.length && isOperator(tokens[index + 1].type); } bool peekIs(IdType tokenType, bool ignoreComments = true) { return peekImplementation(tokenType, 1, ignoreComments); } bool currentIs(IdType tokenType, bool ignoreComments = false) { return peekImplementation(tokenType, 0, ignoreComments); } /// Bugs: not unicode correct size_t tokenEndLine(const Token t) { import std.algorithm : count; switch (t.type) { case tok!"comment": case tok!"stringLiteral": case tok!"wstringLiteral": case tok!"dstringLiteral": return t.line + (cast(ubyte[]) t.text).count('\n'); default: return t.line; } } bool isBlockHeader(int i = 0) { if (i + index < 0 || i + index >= tokens.length) return false; auto t = tokens[i + index].type; return t == tok!"for" || t == tok!"foreach" || t == tok!"foreach_reverse" || t == tok!"while" || t == tok!"if" || t == tok!"out" || t == tok!"catch" || t == tok!"with"; } void newline() { import std.range : assumeSorted; import std.algorithm : max; if (currentIs(tok!"comment") && current.line == tokenEndLine(tokens[index - 1])) return; immutable bool hasCurrent = index + 1 < tokens.length; if (hasCurrent && tokens[index].type == tok!"}" && !assumeSorted( astInformation.funLitEndLocations).equalRange(tokens[index].index).empty) { write(" "); return; } output.put("\n"); if (!justAddedExtraNewline && index > 0 && hasCurrent && tokens[index].line - tokenEndLine(tokens[index - 1]) > 1) { output.put("\n"); } justAddedExtraNewline = false; currentLineLength = 0; if (hasCurrent) { bool switchLabel = false; if (currentIs(tok!"else")) { auto i = indents.indentToMostRecent(tok!"if"); auto v = indents.indentToMostRecent(tok!"version"); auto mostRecent = max(i, v); if (mostRecent != -1) indentLevel = mostRecent; } else if (currentIs(tok!"identifier") && peekIs(tok!":")) { while ((peekBackIs(tok!"}", true) || peekBackIs(tok!";", true)) && indents.length && isTempIndent(indents.top())) { indents.pop(); } auto l = indents.indentToMostRecent(tok!"switch"); if (l != -1) { indentLevel = l; switchLabel = true; } else if (!isBlockHeader(2) || peek2Is(tok!"if")) { auto l2 = indents.indentToMostRecent(tok!"{"); indentLevel = l2 == -1 ? indentLevel : l2; } else indentLevel = indents.indentSize; } else if (currentIs(tok!"case") || currentIs(tok!"default")) { while ((peekBackIs(tok!"}", true) || peekBackIs(tok!";", true)) && indents.length && isTempIndent(indents.top())) { indents.pop(); } auto l = indents.indentToMostRecent(tok!"switch"); if (l != -1) indentLevel = l; } else if (currentIs(tok!"{") && !astInformation.structInitStartLocations.canFindIndex(tokens[index].index) && !astInformation.funLitStartLocations.canFindIndex(tokens[index].index)) { while (indents.length && isWrapIndent(indents.top)) indents.pop(); indents.push(tok!"{"); if (index == 1 || peekBackIs(tok!":", true) || peekBackIs(tok!"{", true) || peekBackIs(tok!"}", true) || peekBackIs(tok!")", true) || peekBackIs(tok!";", true)) { indentLevel = indents.indentSize - 1; } } else if (currentIs(tok!"}")) { while (indents.length && isTempIndent(indents.top())) indents.pop(); if (indents.top == tok!"{") { indentLevel = indents.indentToMostRecent(tok!"{"); indents.pop(); } while (indents.length && isTempIndent(indents.top) && ((indents.top != tok!"if" && indents.top != tok!"version") || !peekIs(tok!"else"))) { indents.pop(); } } else if (astInformation.attributeDeclarationLines.canFindIndex(current.line)) { auto l = indents.indentToMostRecent(tok!"{"); if (l != -1) indentLevel = l; } else { while ((peekBackIs(tok!"}", true) || peekBackIs(tok!";", true)) && indents.length && isTempIndent(indents.top())) { indents.pop(); } indentLevel = indents.indentSize; } indent(); } } void write(string str) { currentLineLength += str.length; output.put(str); } void writeToken() { currentLineLength += currentTokenLength(); if (current.text is null) output.put(str(current.type)); else output.put(current.text); index++; } void indent() { if (config.useTabs) foreach (i; 0 .. indentLevel) { currentLineLength += config.tabSize; output.put("\t"); } else foreach (i; 0 .. indentLevel) foreach (j; 0 .. config.indentSize) { output.put(" "); currentLineLength++; } } int indentLevel; /// Current index into the tokens array size_t index; /// Length of the current line (so far) uint currentLineLength = 0; /// Output to write output to OutputRange output; /// Tokens being formatted const(Token)[] tokens; /// Information about the AST ASTInformation* astInformation; size_t[] linebreakHints; IndentStack indents; /// Configuration FormatterConfig* config; /// Keep track of whether or not an extra newline was just added because of /// an import statement. bool justAddedExtraNewline; int parenDepth; bool spaceAfterParens; } bool isWrapIndent(IdType type) pure nothrow @nogc @safe { return type != tok!"{" && isOperator(type); } bool isTempIndent(IdType type) pure nothrow @nogc @safe { return type != tok!"{"; } /// The only good brace styles enum BraceStyle { allman, otbs } /// Configuration options for formatting struct FormatterConfig { /// Number of spaces used for indentation uint indentSize = 4; /// Use tabs or spaces bool useTabs = false; /// Size of a tab character uint tabSize = 4; /// Soft line wrap limit uint columnSoftLimit = 80; /// Hard line wrap limit uint columnHardLimit = 120; /// Use the One True Brace Style BraceStyle braceStyle = BraceStyle.allman; } bool canFindIndex(const size_t[] items, size_t index) { import std.range : assumeSorted; return !assumeSorted(items).equalRange(index).empty; } /// struct ASTInformation { /// Sorts the arrays so that binary search will work on them void cleanup() { import std.algorithm : sort; sort(doubleNewlineLocations); sort(spaceAfterLocations); sort(unaryLocations); sort(attributeDeclarationLines); sort(caseEndLocations); sort(structInitStartLocations); sort(structInitEndLocations); sort(funLitStartLocations); sort(funLitEndLocations); sort(conditionalWithElseLocations); } /// Locations of end braces for struct bodies size_t[] doubleNewlineLocations; /// Locations of tokens where a space is needed (such as the '*' in a type) size_t[] spaceAfterLocations; /// Locations of unary operators size_t[] unaryLocations; /// Lines containing attribute declarations size_t[] attributeDeclarationLines; /// Case statement colon locations size_t[] caseEndLocations; /// Opening braces of struct initializers size_t[] structInitStartLocations; /// Closing braces of struct initializers size_t[] structInitEndLocations; /// Opening braces of function literals size_t[] funLitStartLocations; /// Closing braces of function literals size_t[] funLitEndLocations; size_t[] conditionalWithElseLocations; size_t[] conditionalStatementLocations; } /// Collects information from the AST that is useful for the formatter final class FormatVisitor : ASTVisitor { /// this(ASTInformation* astInformation) { this.astInformation = astInformation; } override void visit(const ConditionalDeclaration dec) { if (dec.falseDeclaration !is null) { auto condition = dec.compileCondition; if (condition.versionCondition !is null) { astInformation.conditionalWithElseLocations ~= condition.versionCondition.versionIndex; } else if (condition.debugCondition !is null) { astInformation.conditionalWithElseLocations ~= condition.debugCondition.debugIndex; } // Skip "static if" because the formatting for normal "if" handles // it properly } dec.accept(this); } override void visit(const ConditionalStatement statement) { auto condition = statement.compileCondition; if (condition.versionCondition !is null) { astInformation.conditionalStatementLocations ~= condition.versionCondition.versionIndex; } else if (condition.debugCondition !is null) { astInformation.conditionalStatementLocations ~= condition.debugCondition.debugIndex; } statement.accept(this); } override void visit(const FunctionLiteralExpression funcLit) { astInformation.funLitStartLocations ~= funcLit.functionBody .blockStatement.startLocation; astInformation.funLitEndLocations ~= funcLit.functionBody .blockStatement.endLocation; funcLit.accept(this); } override void visit(const DefaultStatement defaultStatement) { astInformation.caseEndLocations ~= defaultStatement.colonLocation; defaultStatement.accept(this); } override void visit(const CaseStatement caseStatement) { astInformation.caseEndLocations ~= caseStatement.colonLocation; caseStatement.accept(this); } override void visit(const CaseRangeStatement caseRangeStatement) { astInformation.caseEndLocations ~= caseRangeStatement.colonLocation; caseRangeStatement.accept(this); } override void visit(const FunctionBody functionBody) { if (functionBody.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.blockStatement.endLocation; if (functionBody.bodyStatement !is null && functionBody.bodyStatement.blockStatement !is null) astInformation.doubleNewlineLocations ~= functionBody.bodyStatement.blockStatement.endLocation; functionBody.accept(this); } override void visit(const StructInitializer structInitializer) { astInformation.structInitStartLocations ~= structInitializer.startLocation; astInformation.structInitEndLocations ~= structInitializer.endLocation; structInitializer.accept(this); } override void visit(const EnumBody enumBody) { astInformation.doubleNewlineLocations ~= enumBody.endLocation; enumBody.accept(this); } override void visit(const Unittest unittest_) { astInformation.doubleNewlineLocations ~= unittest_.blockStatement.endLocation; unittest_.accept(this); } override void visit(const Invariant invariant_) { astInformation.doubleNewlineLocations ~= invariant_.blockStatement.endLocation; invariant_.accept(this); } override void visit(const StructBody structBody) { astInformation.doubleNewlineLocations ~= structBody.endLocation; structBody.accept(this); } override void visit(const TemplateDeclaration templateDeclaration) { astInformation.doubleNewlineLocations ~= templateDeclaration.endLocation; templateDeclaration.accept(this); } override void visit(const TypeSuffix typeSuffix) { if (typeSuffix.star.type != tok!"") astInformation.spaceAfterLocations ~= typeSuffix.star.index; typeSuffix.accept(this); } override void visit(const UnaryExpression unary) { if (unary.prefix.type == tok!"~" || unary.prefix.type == tok!"&" || unary.prefix.type == tok!"*" || unary.prefix.type == tok!"+" || unary.prefix.type == tok!"-") { astInformation.unaryLocations ~= unary.prefix.index; } unary.accept(this); } override void visit(const AttributeDeclaration attributeDeclaration) { astInformation.attributeDeclarationLines ~= attributeDeclaration.line; attributeDeclaration.accept(this); } private: ASTInformation* astInformation; alias visit = ASTVisitor.visit; } /// Length of an invalid token enum int INVALID_TOKEN_LENGTH = -1; string generateFixedLengthCases() { import std.algorithm : map; import std.string : format; string[] fixedLengthTokens = ["abstract", "alias", "align", "asm", "assert", "auto", "body", "bool", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", "double", "else", "enum", "export", "extern", "false", "final", "finally", "float", "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow", "null", "out", "override", "package", "pragma", "private", "protected", "public", "pure", "real", "ref", "return", "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", "version", "void", "volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="]; return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t"); } int tokenLength(ref const Token t) pure @safe @nogc { import std.algorithm : countUntil; switch (t.type) { case tok!"doubleLiteral": case tok!"floatLiteral": case tok!"idoubleLiteral": case tok!"ifloatLiteral": case tok!"intLiteral": case tok!"longLiteral": case tok!"realLiteral": case tok!"irealLiteral": case tok!"uintLiteral": case tok!"ulongLiteral": case tok!"characterLiteral": return cast(int) t.text.length; case tok!"identifier": case tok!"stringLiteral": case tok!"wstringLiteral": case tok!"dstringLiteral": // TODO: Unicode line breaks and old-Mac line endings auto c = cast(int) t.text.countUntil('\n'); if (c == -1) return cast(int) t.text.length; else return c; mixin (generateFixedLengthCases()); default: return INVALID_TOKEN_LENGTH; } } bool isBreakToken(IdType t) { switch (t) { case tok!"||": case tok!"&&": case tok!"(": case tok!"[": case tok!",": case tok!"^^": case tok!"^=": case tok!"^": case tok!"~=": case tok!"<<=": case tok!"<<": case tok!"<=": case tok!"<>=": case tok!"<>": case tok!"<": case tok!"==": case tok!"=>": case tok!"=": case tok!">=": case tok!">>=": case tok!">>>=": case tok!">>>": case tok!">>": case tok!">": case tok!"|=": case tok!"|": case tok!"-=": case tok!"!<=": case tok!"!<>=": case tok!"!<>": case tok!"!<": case tok!"!=": case tok!"!>=": case tok!"!>": case tok!"?": case tok!"/=": case tok!"/": case tok!"..": case tok!"*=": case tok!"&=": case tok!"%=": case tok!"%": case tok!"+=": case tok!".": case tok!"~": case tok!"+": case tok!"-": return true; default: return false; } } int breakCost(IdType t) { switch (t) { case tok!"||": case tok!"&&": return 0; case tok!"[": case tok!"(": case tok!",": return 10; case tok!"^^": case tok!"^=": case tok!"^": case tok!"~=": case tok!"<<=": case tok!"<<": case tok!"<=": case tok!"<>=": case tok!"<>": case tok!"<": case tok!"==": case tok!"=>": case tok!"=": case tok!">=": case tok!">>=": case tok!">>>=": case tok!">>>": case tok!">>": case tok!">": case tok!"|=": case tok!"|": case tok!"-=": case tok!"!<=": case tok!"!<>=": case tok!"!<>": case tok!"!<": case tok!"!=": case tok!"!>=": case tok!"!>": case tok!"?": case tok!"/=": case tok!"/": case tok!"..": case tok!"*=": case tok!"&=": case tok!"%=": case tok!"%": case tok!"+": case tok!"-": case tok!"~": case tok!"+=": return 100; case tok!".": return 200; default: return 1000; } } struct State { this(size_t[] breaks, const Token[] tokens, int depth, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { this.breaks = breaks; this._depth = depth; import std.algorithm : map, sum; this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + (depth * 500); int ll = currentLineLength; size_t breakIndex = 0; size_t i = 0; bool s = true; if (breaks.length == 0) { _cost = int.max; immutable int l = currentLineLength + tokens.map!(a => tokenLength(a)).sum(); s = l < formatterConfig.columnSoftLimit; } else { do { immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); if (ll > formatterConfig.columnSoftLimit) { s = false; break; } i = j; ll = (indentLevel + 1) * formatterConfig.indentSize; breakIndex++; } while (i + 1 < tokens.length); } this._solved = s; } int cost() const pure nothrow @safe @property { return _cost; } int depth() const pure nothrow @safe @property { return _depth; } int solved() const pure nothrow @safe @property { return _solved; } int opCmp(ref const State other) const pure nothrow @safe { if (cost < other.cost || (cost == other.cost && ((breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) || (_solved && !other.solved)))) { return -1; } return other.cost > _cost; } bool opEquals(ref const State other) const pure nothrow @safe { return other.breaks == breaks; } size_t toHash() const nothrow @safe { return typeid(breaks).getHash(&breaks); } size_t[] breaks; private: int _cost; int _depth; bool _solved; } size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { import std.container.rbtree : RedBlackTree; import std.algorithm : filter, min; import core.memory : GC; enum ALGORITHMIC_COMPLEXITY_SUCKS = 25; immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS); int depth = 0; auto open = new RedBlackTree!State; open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth, formatterConfig, currentLineLength, indentLevel)); State lowest; GC.disable(); scope(exit) GC.enable(); while (!open.empty) { State current = open.front(); if (current.cost < lowest.cost) lowest = current; open.removeFront(); if (current.solved) { current.breaks[] += index; return current.breaks; } foreach (next; validMoves(tokens[0 .. tokensEnd], current, formatterConfig, currentLineLength, indentLevel, depth)) { open.insert(next); } } if (open.empty) { lowest.breaks[] += index; return lowest.breaks; } foreach (r; open[].filter!(a => a.solved)) { r.breaks[] += index; return r.breaks; } assert (false); } State[] validMoves(const Token[] tokens, ref const State current, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth) { import std.algorithm : sort, canFind; import std.array : insertInPlace; State[] states; foreach (i, token; tokens) { if (current.breaks.canFind(i) || !isBreakToken(token.type)) continue; size_t[] breaks; breaks ~= current.breaks; breaks ~= i; sort(breaks); states ~= State(breaks, tokens, depth + 1, formatterConfig, currentLineLength, indentLevel); } return states; } struct IndentStack { int indentToMostRecent(IdType item) { size_t i = index; while (true) { if (arr[i] == item) return indentSize(i); if (i > 0) i--; else return -1; } } int tempIndents() const pure nothrow @property { if (index == 0) return 0; int tempIndentCount = 0; for(size_t i = index; i > 0; i--) { if (!isTempIndent(arr[i])) break; tempIndentCount++; } return tempIndentCount; } void push(IdType item) pure nothrow { index = index == 255 ? index : index + 1; arr[index] = item; } void pop() pure nothrow { index = index == 0 ? index : index - 1; } IdType top() const pure nothrow @property { return arr[index]; } int indentSize(size_t k = size_t.max) const pure nothrow { if (index == 0) return 0; immutable size_t j = k == size_t.max ? index : k - 1; int size = 0; foreach (i; 1 .. j + 1) { if ((i + 1 <= index && !isWrapIndent(arr[i]) && isTempIndent(arr[i]) && (!isTempIndent(arr[i + 1]) || arr[i + 1] == tok!"switch"))) { continue; } size++; } return size; } int length() const pure nothrow @property { return cast(int) index; } private: size_t index; IdType[256] arr; } unittest { import std.string : format; auto sourceCode = q{const Token[] tokens, ref const State current, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth}; LexerConfig config; config.stringBehavior = StringBehavior.source; config.whitespaceBehavior = WhitespaceBehavior.skip; StringCache cache = StringCache(StringCache.defaultBucketCount); auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array(); FormatterConfig formatterConfig; auto result = chooseLineBreakTokens(0, tokens, &formatterConfig, 0, 0); assert ([15] == result, "%s".format(result)); }