dfmt/src/dfmt.d

1717 lines
50 KiB
D

/*******************************************************************************
* Boost Software License - Version 1.0 - August 17th, 2003
*
* Permission is hereby granted, free of charge, to any person or organization
* obtaining a copy of the software and accompanying documentation covered by
* this license (the "Software") to use, reproduce, display, distribute,
* execute, and transmit the Software, and to prepare derivative works of the
* Software, and to permit third-parties to whom the Software is furnished to
* do so, all subject to the following:
*
* The copyright notices in the Software and this entire statement, including
* the above license grant, this restriction and the following disclaimer,
* must be included in all copies of the Software, in whole or in part, and
* all derivative works of the Software, unless such copies or derivative
* works are solely in the form of machine-executable object code generated by
* a source language processor.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
******************************************************************************/
module dfmt;
import std.stdio;
import std.d.lexer;
import std.d.parser;
import std.d.formatter;
import std.d.ast;
import std.array;
version (NoMain)
{ }
else
int main(string[] args)
{
import std.getopt : getopt;
bool inplace = false;
bool show_usage = false;
FormatterConfig formatterConfig;
getopt(args,
"help|h", &show_usage,
"inplace", &inplace,
"tabs|t", &formatterConfig.useTabs,
"braces", &formatterConfig.braceStyle);
if (show_usage)
{
import std.path: baseName;
writef(USAGE, baseName(args[0]));
return 0;
}
File output = stdout;
ubyte[] buffer;
args.popFront();
if (args.length == 0)
{
ubyte[4096] inputBuffer;
ubyte[] b;
while (true)
{
b = stdin.rawRead(inputBuffer);
if (b.length)
buffer ~= b;
else
break;
}
format("stdin", buffer, output.lockingTextWriter(), &formatterConfig);
}
else
{
import std.file : dirEntries, isDir, SpanMode;
if (args.length >= 2)
inplace = true;
while (args.length > 0)
{
const path = args.front;
args.popFront();
if (isDir(path))
{
inplace = true;
foreach (string name; dirEntries(path, "*.d", SpanMode.depth))
{
args ~= name;
}
continue;
}
File f = File(path);
buffer = new ubyte[](cast(size_t) f.size);
f.rawRead(buffer);
if (inplace)
output = File(path, "w");
format(path, buffer, output.lockingTextWriter(), &formatterConfig);
}
}
return 0;
}
private:
immutable USAGE = "usage: %s [--inplace] [<path>...]
Formats D code.
--inplace Change file in-place instead of outputing to stdout
(implicit in case of multiple files)
--tabs | -t Use tabs instead of spaces for indentation
--braces=allman Use Allman indent style (default)
--braces=otbs Use the One True Brace Style
--help | -h Display this help and exit
";
void format(OutputRange)(string source_desc, ubyte[] buffer, OutputRange output,
FormatterConfig* formatterConfig)
{
LexerConfig config;
config.stringBehavior = StringBehavior.source;
config.whitespaceBehavior = WhitespaceBehavior.skip;
LexerConfig parseConfig;
parseConfig.stringBehavior = StringBehavior.source;
parseConfig.whitespaceBehavior = WhitespaceBehavior.skip;
StringCache cache = StringCache(StringCache.defaultBucketCount);
ASTInformation astInformation;
auto parseTokens = getTokensForParser(buffer, parseConfig, &cache);
auto mod = parseModule(parseTokens, source_desc);
auto visitor = new FormatVisitor(&astInformation);
visitor.visit(mod);
astInformation.cleanup();
auto tokens = byToken(buffer, config, &cache).array();
auto tokenFormatter = TokenFormatter!OutputRange(tokens, output, &astInformation,
formatterConfig);
tokenFormatter.format();
}
struct TokenFormatter(OutputRange)
{
/**
* Params:
* tokens = the tokens to format
* output = the output range that the code will be formatted to
* astInformation = information about the AST used to inform formatting
* decisions.
*/
this(const(Token)[] tokens, OutputRange output, ASTInformation* astInformation,
FormatterConfig* config)
{
this.tokens = tokens;
this.output = output;
this.astInformation = astInformation;
this.config = config;
}
/// Runs the foramtting process
void format()
{
while (index < tokens.length)
formatStep();
}
invariant
{
assert (indentLevel >= 0);
}
private:
void formatStep()
{
import std.range : assumeSorted;
import std.algorithm : canFind, startsWith;
assert (index < tokens.length);
if (currentIs(tok!"comment"))
{
if (index > 0)
{
if (tokens[index - 1].type != tok!";"
&& tokens[index - 1].type != tok!"}"
&& tokens[index - 1].line + 1 < tokens[index].line)
{
newline();
}
else if (tokens[index - 1].line == tokens[index].line)
write(" ");
}
writeToken();
auto j = justAddedExtraNewline;
if (tokens[index - 1].text[0 .. 2] == "//")
{
newline();
justAddedExtraNewline = j;
}
else if (index < tokens.length)
{
if (tokens[index - 1].line == tokens[index].line)
{
if (tokens[index].type != tok!"{")
write(" ");
}
else if (!currentIs(tok!"{"))
newline();
}
else
newline();
}
else if (isStringLiteral(current.type) || isNumberLiteral(current.type)
|| currentIs(tok!"characterLiteral"))
{
writeToken();
}
else if (currentIs(tok!"module") || currentIs(tok!"import"))
{
auto t = current.type;
writeToken();
if (currentIs(tok!"("))
{
writeParens(false);
return;
}
write(" ");
while (index < tokens.length)
{
if (currentIs(tok!";"))
{
writeToken();
tempIndent = 0;
if (index >= tokens.length)
{
newline();
break;
}
if (currentIs(tok!"comment") && current.line == peekBack().line)
{
justAddedExtraNewline = true;
break;
}
else if ((t == tok!"import" && !currentIs(tok!"import")))
{
write("\n");
justAddedExtraNewline = true;
newline();
}
else
newline();
break;
}
else if (currentIs(tok!","))
{
// compute length until next , or ;
int length_of_next_chunk = INVALID_TOKEN_LENGTH;
for (size_t i=index+1; i<tokens.length; i++)
{
if (tokens[i].type == tok!"," || tokens[i].type == tok!";")
break;
const len = tokenLength(tokens[i]);
assert (len >= 0);
length_of_next_chunk += len;
}
assert (length_of_next_chunk > 0);
writeToken();
if (currentLineLength + 1 + length_of_next_chunk >= config.columnSoftLimit)
{
pushIndent();
newline();
}
else
write(" ");
}
else
formatStep();
}
}
else if (currentIs(tok!"return"))
{
writeToken();
if (!currentIs(tok!";") && !currentIs(tok!")"))
write(" ");
}
else if (currentIs(tok!"switch"))
formatSwitch();
else if ((currentIs(tok!"version") || currentIs(tok!"extern"))
&& peekIs(tok!"("))
{
writeToken();
write(" ");
writeParens(true);
}
else if (isBlockHeader() && peekIs(tok!"(", false))
{
if (currentIs(tok!"if"))
ifIndents.push(tempIndent);
writeToken();
write(" ");
writeParens(false);
if (currentIs(tok!"switch") || (currentIs(tok!"final") && peekIs(tok!"switch")))
write(" ");
else if (currentIs(tok!"comment"))
{
if (!peekIs(tok!"{") && !peekIs(tok!";"))
pushIndent();
formatStep();
}
else if (!currentIs(tok!"{") && !currentIs(tok!";"))
{
pushIndent();
newline();
}
}
else if (currentIs(tok!"else"))
{
writeToken();
if (currentIs(tok!"if") || (currentIs(tok!"static") && peekIs(tok!"if"))
|| currentIs(tok!"version"))
{
write(" ");
}
else if (!currentIs(tok!"{") && !currentIs(tok!"comment"))
{
pushIndent();
newline();
}
}
else if (isKeyword(current.type))
{
switch (current.type)
{
case tok!"default":
writeToken();
break;
case tok!"cast":
writeToken();
writeParens(true);
break;
case tok!"in":
case tok!"is":
writeToken();
if (!currentIs(tok!"("))
write(" ");
break;
default:
if (index + 1 < tokens.length)
{
if (!peekIs(tok!"@") && peekIsOperator())
writeToken();
else
{
writeToken();
write(" ");
}
}
else
writeToken();
break;
}
}
else if (isBasicType(current.type))
{
writeToken();
if (currentIs(tok!"identifier") || isKeyword(current.type))
write(" ");
}
else if (isOperator(current.type))
{
switch (current.type)
{
case tok!"*":
if (!assumeSorted(astInformation.spaceAfterLocations)
.equalRange(current.index).empty)
{
writeToken();
if (!currentIs(tok!"*") && !currentIs(tok!")") && !currentIs(tok!"[")
&& !currentIs(tok!",") && !currentIs(tok!";"))
{
write(" ");
}
break;
}
else if (assumeSorted(astInformation.unaryLocations).equalRange(current.index).empty)
goto binary;
else
writeToken();
break;
case tok!"~":
if (peekIs(tok!"this"))
{
if (!(index == 0 || peekBackIs(tok!"{") || peekBackIs(tok!"}")
|| peekBackIs(tok!";")))
{
write(" ");
}
writeToken();
break;
}
else
goto case;
case tok!"&":
case tok!"+":
case tok!"-":
if (!assumeSorted(astInformation.unaryLocations)
.equalRange(current.index).empty)
{
writeToken();
break;
}
goto binary;
case tok!"(":
writeParens(true);
break;
case tok!"!":
if (peekIs(tok!"is"))
write(" ");
goto case;
case tok!"@":
case tok!"...":
case tok!"[":
case tok!"++":
case tok!"--":
case tok!"$":
writeToken();
break;
case tok!":":
if (!assumeSorted(astInformation.attributeDeclarationLines)
.equalRange(current.line).empty)
{
writeToken();
tempIndent = 0;
newline();
}
else if (!assumeSorted(astInformation.caseEndLocations)
.equalRange(current.index).empty)
{
if (!(peekIs(tok!"case", false) || peekIs(tok!"default", false)
|| peekIs(tok!"}") || peekIsLabel()))
{
indentLevel++;
}
writeToken();
newline();
}
else if (peekBackIs(tok!"identifier") && (peekBack2Is(tok!";", true)
|| peekBack2Is(tok!"}", true) || peekBack2Is(tok!"{", true)))
{
if (tempIndent < 0)
tempIndent = 0;
else
popIndent();
writeToken();
if (isBlockHeader() && !currentIs(tok!"if"))
write(" ");
else if (!currentIs(tok!"{"))
newline();
}
else
{
write(" : ");
index++;
}
break;
case tok!"]":
writeToken();
if (currentIs(tok!"identifier"))
write(" ");
break;
case tok!";":
if (peekIs(tok!"else"))
{
tempIndent = ifIndents.top();
if (ifIndents.length)
ifIndents.pop();
}
else if (braceIndents.top() < tempIndent)
{
if (!peekIs(tok!"}"))
tempIndent = 0;
else
popIndent();
}
writeToken();
linebreakHints = [];
newline();
break;
case tok!"{":
writeBraces();
break;
case tok!".":
if (linebreakHints.canFind(index) || (linebreakHints.length == 0
&& currentLineLength + nextTokenLength() > config.columnHardLimit))
{
if (tempIndent < 2)
pushIndent();
newline();
}
writeToken();
break;
case tok!",":
if (!peekIs(tok!"}") && (linebreakHints.canFind(index)
|| (linebreakHints.length == 0
&& currentLineLength > config.columnSoftLimit)))
{
writeToken();
if (tempIndent < 2)
pushIndent();
newline();
}
else
{
writeToken();
if (currentIs(tok!"}", false))
tempIndent = 0;
else if (!currentIs(tok!")", false) && !currentIs(tok!"]", false)
&& !currentIs(tok!"comment", false))
{
write(" ");
}
}
regenLineBreakHintsIfNecessary(index - 1);
break;
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!"|=":
case tok!"-=":
case tok!"/=":
case tok!"*=":
case tok!"&=":
case tok!"%=":
case tok!"+=":
write(" ");
writeToken();
write(" ");
regenLineBreakHintsIfNecessary(index - 1);
break;
case tok!"&&":
case tok!"||":
regenLineBreakHintsIfNecessary(index);
goto case;
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"?":
case tok!"/":
case tok!"..":
case tok!"%":
binary:
if (linebreakHints.canFind(index))
{
if (tempIndent < 2)
pushIndent();
newline();
}
else
write(" ");
writeToken();
write(" ");
break;
default:
writeToken();
break;
}
}
else if (currentIs(tok!"identifier"))
{
writeToken();
if (index < tokens.length && (currentIs(tok!"identifier")
|| isKeyword(current.type) || isBasicType(current.type)
|| currentIs(tok!"@")))
{
write(" ");
}
}
else
writeToken();
}
void regenLineBreakHintsIfNecessary(immutable size_t i)
{
if (linebreakHints.length == 0 || linebreakHints[$ - 1] <= i - 1)
{
immutable size_t j = expressionEndIndex(i);
linebreakHints = chooseLineBreakTokens(i, tokens[i .. j],
config, currentLineLength, indentLevel);
}
}
/// Pushes a temporary indent level
void pushIndent()
{
// stderr.writeln("pushIndent: ", current.line, ",", current.column);
tempIndent++;
}
/// Pops a temporary indent level
void popIndent()
{
// if (index < tokens.length)
// stderr.writeln("popIndent: ", current.line, ",", current.column);
// else
// stderr.writeln("popIndent: EOF");
if (tempIndent > 0)
tempIndent--;
}
size_t expressionEndIndex(size_t i) const pure @safe @nogc
{
int parenDepth = 0;
int bracketDepth = 0;
int braceDepth = 0;
loop : while (i < tokens.length) switch (tokens[i].type)
{
case tok!"(":
parenDepth++;
i++;
break;
case tok!"{":
braceDepth++;
i++;
break;
case tok!"[":
bracketDepth++;
i++;
break;
case tok!")":
parenDepth--;
if (parenDepth <= 0)
break loop;
i++;
break;
case tok!"}":
braceDepth--;
if (braceDepth <= 0)
break loop;
i++;
break;
case tok!"]":
bracketDepth--;
if (bracketDepth <= 0)
break loop;
i++;
break;
case tok!";":
break loop;
default:
i++;
break;
}
return i;
}
/// Writes balanced braces
void writeBraces()
{
import std.range : assumeSorted;
int depth = 0;
do
{
if (currentIs(tok!"{"))
{
braceIndents.push(tempIndent);
depth++;
if (assumeSorted(astInformation.structInitStartLocations)
.equalRange(tokens[index].index).length)
{
writeToken();
}
else
{
if (index > 0 && !justAddedExtraNewline && !peekBackIs(tok!"{")
&& !peekBackIs(tok!"}") && !peekBackIs(tok!";"))
{
if (config.braceStyle == BraceStyle.otbs)
{
write(" ");
}
else if (!peekBackIs(tok!"comment") || tokens[index - 1].text[0 .. 2] != "//")
newline();
}
write("{");
indentLevel++;
index++;
newline();
}
}
else if (currentIs(tok!"}"))
{
braceIndents.pop();
depth--;
if (assumeSorted(astInformation.structInitEndLocations)
.equalRange(tokens[index].index).length)
{
writeToken();
}
else
{
// Silly hack to format enums better.
if (peekBackIsLiteralOrIdent() || peekBackIs(tok!","))
newline();
write("}");
if (index < tokens.length - 1 &&
assumeSorted(astInformation.doubleNewlineLocations)
.equalRange(tokens[index].index).length && !peekIs(tok!"}"))
{
output.put("\n");
justAddedExtraNewline = true;
}
if (config.braceStyle == BraceStyle.otbs)
{
if (index < tokens.length && currentIs(tok!"else"))
write(" ");
}
index++;
if (peekIs(tok!"case") || peekIs(tok!"default"))
indentLevel--;
newline();
}
}
else
formatStep();
}
while (index < tokens.length && depth > 0);
}
void writeParens(bool space_afterwards)
in
{
assert (currentIs(tok!"("), str(current.type));
}
body
{
import std.range : assumeSorted;
immutable t = tempIndent;
int depth = 0;
do
{
if (currentIs(tok!";"))
{
if (!(peekIs(tok!";") || peekIs(tok!")")))
write("; ");
else
write(";");
index++;
continue;
}
else if (currentIs(tok!"("))
{
writeToken();
depth++;
if (!assumeSorted(linebreakHints).equalRange(index - 1).empty
|| (linebreakHints.length == 0
&& currentLineLength > config.columnSoftLimit
&& !currentIs(tok!")")))
{
if (tempIndent < 2)
pushIndent();
newline();
}
regenLineBreakHintsIfNecessary(index - 1);
continue;
}
else if (currentIs(tok!")"))
{
depth--;
if (depth == 0 && (peekIs(tok!"in") || peekIs(tok!"out")
|| peekIs(tok!"body")))
{
writeToken(); // )
newline();
writeToken(); // in/out/body
}
else if (peekIsLiteralOrIdent() || peekIsBasicType() || peekIsKeyword())
{
writeToken();
if (space_afterwards || depth > 0)
write(" ");
}
else if ((peekIsKeyword() || peekIs(tok!"@")) && space_afterwards)
{
writeToken();
write(" ");
}
else
writeToken();
}
else
formatStep();
}
while (index < tokens.length && depth > 0);
// popIndent();
tempIndent = t;
linebreakHints = [];
}
bool peekIsKeyword()
{
return index + 1 < tokens.length && isKeyword(tokens[index + 1].type);
}
bool peekIsBasicType()
{
return index + 1 < tokens.length && isBasicType(tokens[index + 1].type);
}
bool peekIsLabel()
{
return peekIs(tok!"identifier") && peek2Is(tok!":");
}
void formatSwitch()
{
immutable l = indentLevel;
writeToken(); // switch
write(" ");
writeParens(true);
if (currentIs(tok!"with"))
{
writeToken();
write(" ");
writeParens(true);
}
if (!currentIs(tok!"{"))
return;
if (config.braceStyle == BraceStyle.otbs)
write(" ");
else
newline();
writeToken();
if (!currentIs(tok!"case") && !currentIs(tok!"default") && !currentIs(tok!"}"))
indentLevel++;
newline();
while (index < tokens.length)
{
if (currentIs(tok!"}"))
{
indentLevel = l;
indent();
writeToken();
newline();
return;
}
else if (currentIs(tok!";") && peekIs(tok!"}", false))
{
writeToken();
newline();
indentLevel = l;
writeToken();
newline();
return;
}
else if (currentIs(tok!"case"))
{
writeToken();
write(" ");
}
else if (currentIs(tok!":"))
{
if (peekIs(tok!".."))
{
writeToken();
write(" ");
writeToken();
write(" ");
}
else if (peekIsLabel())
{
writeToken();
pushIndent();
newline();
writeToken();
writeToken();
pushIndent();
newline();
}
else
goto peek;
}
else if (currentIs(tok!"}", false))
break;
else
{
peek:
if (peekIs(tok!"case", false) || peekIs(tok!"default", false)
|| peekIs(tok!"}", false))
{
indentLevel = l;
if (peekIsLabel())
pushIndent();
}
formatStep();
}
}
indentLevel = l;
if (currentIs(tok!"}"))
{
writeToken();
newline();
}
}
int currentTokenLength() pure @safe @nogc
{
return tokenLength(tokens[index]);
}
int nextTokenLength() pure @safe @nogc
{
immutable size_t i = index + 1;
if (i >= tokens.length)
return INVALID_TOKEN_LENGTH;
return tokenLength(tokens[i]);
}
ref current() const @property
in
{
assert (index < tokens.length);
}
body
{
return tokens[index];
}
const(Token) peekBack()
{
assert (index > 0);
return tokens[index - 1];
}
bool peekBackIsLiteralOrIdent()
{
if (index == 0) return false;
switch (tokens[index - 1].type)
{
case tok!"doubleLiteral":
case tok!"floatLiteral":
case tok!"idoubleLiteral":
case tok!"ifloatLiteral":
case tok!"intLiteral":
case tok!"longLiteral":
case tok!"realLiteral":
case tok!"irealLiteral":
case tok!"uintLiteral":
case tok!"ulongLiteral":
case tok!"characterLiteral":
case tok!"identifier":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
return true;
default:
return false;
}
}
bool peekIsLiteralOrIdent()
{
if (index + 1 >= tokens.length) return false;
switch (tokens[index + 1].type)
{
case tok!"doubleLiteral":
case tok!"floatLiteral":
case tok!"idoubleLiteral":
case tok!"ifloatLiteral":
case tok!"intLiteral":
case tok!"longLiteral":
case tok!"realLiteral":
case tok!"irealLiteral":
case tok!"uintLiteral":
case tok!"ulongLiteral":
case tok!"characterLiteral":
case tok!"identifier":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
return true;
default:
return false;
}
}
bool peekBackIs(IdType tokenType, bool ignoreComments = false)
{
return peekImplementation(tokenType, -1, ignoreComments);
}
bool peekBack2Is(IdType tokenType, bool ignoreComments = false)
{
return peekImplementation(tokenType, -2, ignoreComments);
}
bool peekImplementation(IdType tokenType, int n, bool ignoreComments = true)
{
auto i = index + n;
if (ignoreComments)
while (n != 0 && i < tokens.length && tokens[i].type == tok!"comment")
i = n > 0 ? i + 1 : i - 1;
return i < tokens.length && tokens[i].type == tokenType;
}
bool peek2Is(IdType tokenType, bool ignoreComments = true)
{
return peekImplementation(tokenType, 2, ignoreComments);
}
bool peekIsOperator()
{
return index + 1 < tokens.length && isOperator(tokens[index + 1].type);
}
bool peekIs(IdType tokenType, bool ignoreComments = true)
{
return peekImplementation(tokenType, 1, ignoreComments);
}
bool currentIs(IdType tokenType, bool ignoreComments = false)
{
return peekImplementation(tokenType, 0, ignoreComments);
}
/// Bugs: not unicode correct
size_t tokenEndLine(const Token t)
{
import std.algorithm : count;
switch (t.type)
{
case tok!"comment":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
return t.line + (cast(ubyte[]) t.text).count('\n');
default:
return t.line;
}
}
bool isBlockHeader(int i = 0)
{
if (i + index < 0 || i + index >= tokens.length)
return false;
auto t = tokens[i + index].type;
return t == tok!"for" || t == tok!"foreach"
|| t == tok!"foreach_reverse" || t == tok!"while"
|| t == tok!"if" || t == tok!"out"
|| t == tok!"catch" || t == tok!"with";
}
void newline()
{
import std.range : assumeSorted;
if (currentIs(tok!"comment") && current.line == tokenEndLine(tokens[index - 1]))
return;
output.put("\n");
immutable bool hasCurrent = index + 1 < tokens.length;
if (!justAddedExtraNewline && index > 0
&& hasCurrent && tokens[index].line - tokenEndLine(tokens[index - 1]) > 1)
{
output.put("\n");
}
justAddedExtraNewline = false;
currentLineLength = 0;
if (hasCurrent)
{
if (currentIs(tok!"}"))
{
tempIndent = braceIndents.top();
indentLevel--;
}
else if ((!assumeSorted(astInformation.attributeDeclarationLines)
.equalRange(current.line).empty) || (currentIs(tok!"identifier")
&& peekIs(tok!":") && (!isBlockHeader(2) || peek2Is(tok!"if"))))
{
tempIndent--;
}
indent();
}
}
void write(string str)
{
currentLineLength += str.length;
output.put(str);
}
void writeToken()
{
currentLineLength += currentTokenLength();
if (current.text is null)
output.put(str(current.type));
else
output.put(current.text);
index++;
}
void indent()
{
import std.range : repeat, take;
if (config.useTabs)
foreach (i; 0 .. indentLevel + tempIndent)
{
currentLineLength += config.tabSize;
output.put("\t");
}
else
foreach (i; 0 .. indentLevel + tempIndent)
foreach (j; 0 .. config.indentSize)
{
output.put(" ");
currentLineLength++;
}
}
/// Current index into the tokens array
size_t index;
/// Current indent level
int indentLevel;
/// Current temproray indententation level;
int tempIndent;
/// Length of the current line (so far)
uint currentLineLength = 0;
/// Output to write output to
OutputRange output;
/// Tokens being formatted
const(Token)[] tokens;
/// Information about the AST
ASTInformation* astInformation;
size_t[] linebreakHints;
FixedStack ifIndents;
FixedStack braceIndents;
/// Configuration
FormatterConfig* config;
/// Keep track of whether or not an extra newline was just added because of
/// an import statement.
bool justAddedExtraNewline;
}
/// The only good brace styles
enum BraceStyle
{
allman,
otbs
}
/// Configuration options for formatting
struct FormatterConfig
{
/// Number of spaces used for indentation
uint indentSize = 4;
/// Use tabs or spaces
bool useTabs = false;
/// Size of a tab character
uint tabSize = 4;
/// Soft line wrap limit
uint columnSoftLimit = 80;
/// Hard line wrap limit
uint columnHardLimit = 120;
/// Use the One True Brace Style
BraceStyle braceStyle = BraceStyle.allman;
}
///
struct ASTInformation
{
/// Sorts the arrays so that binary search will work on them
void cleanup()
{
import std.algorithm : sort;
sort(doubleNewlineLocations);
sort(spaceAfterLocations);
sort(unaryLocations);
sort(attributeDeclarationLines);
sort(caseEndLocations);
sort(structInitStartLocations);
sort(structInitEndLocations);
}
/// Locations of end braces for struct bodies
size_t[] doubleNewlineLocations;
/// Locations of tokens where a space is needed (such as the '*' in a type)
size_t[] spaceAfterLocations;
/// Locations of unary operators
size_t[] unaryLocations;
/// Lines containing attribute declarations
size_t[] attributeDeclarationLines;
/// Case statement colon locations
size_t[] caseEndLocations;
/// Opening braces of struct initializers
size_t[] structInitStartLocations;
/// Closing braces of struct initializers
size_t[] structInitEndLocations;
}
/// Collects information from the AST that is useful for the formatter
final class FormatVisitor : ASTVisitor
{
///
this(ASTInformation* astInformation)
{
this.astInformation = astInformation;
}
override void visit(const DefaultStatement defaultStatement)
{
astInformation.caseEndLocations ~= defaultStatement.colonLocation;
defaultStatement.accept(this);
}
override void visit(const CaseStatement caseStatement)
{
astInformation.caseEndLocations ~= caseStatement.colonLocation;
caseStatement.accept(this);
}
override void visit(const CaseRangeStatement caseRangeStatement)
{
astInformation.caseEndLocations ~= caseRangeStatement.colonLocation;
caseRangeStatement.accept(this);
}
override void visit(const FunctionBody functionBody)
{
if (functionBody.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.blockStatement.endLocation;
if (functionBody.bodyStatement !is null && functionBody.bodyStatement.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.bodyStatement.blockStatement.endLocation;
functionBody.accept(this);
}
override void visit(const StructInitializer structInitializer)
{
astInformation.structInitStartLocations ~= structInitializer.startLocation;
astInformation.structInitEndLocations ~= structInitializer.endLocation;
structInitializer.accept(this);
}
override void visit(const EnumBody enumBody)
{
astInformation.doubleNewlineLocations ~= enumBody.endLocation;
enumBody.accept(this);
}
override void visit(const Unittest unittest_)
{
astInformation.doubleNewlineLocations ~= unittest_.blockStatement.endLocation;
unittest_.accept(this);
}
override void visit(const Invariant invariant_)
{
astInformation.doubleNewlineLocations ~= invariant_.blockStatement.endLocation;
invariant_.accept(this);
}
override void visit(const StructBody structBody)
{
astInformation.doubleNewlineLocations ~= structBody.endLocation;
structBody.accept(this);
}
override void visit(const TemplateDeclaration templateDeclaration)
{
astInformation.doubleNewlineLocations ~= templateDeclaration.endLocation;
templateDeclaration.accept(this);
}
override void visit(const TypeSuffix typeSuffix)
{
if (typeSuffix.star.type != tok!"")
astInformation.spaceAfterLocations ~= typeSuffix.star.index;
typeSuffix.accept(this);
}
override void visit(const UnaryExpression unary)
{
if (unary.prefix.type == tok!"~" || unary.prefix.type == tok!"&"
|| unary.prefix.type == tok!"*" || unary.prefix.type == tok!"+"
|| unary.prefix.type == tok!"-")
{
astInformation.unaryLocations ~= unary.prefix.index;
}
unary.accept(this);
}
override void visit(const AttributeDeclaration attributeDeclaration)
{
astInformation.attributeDeclarationLines ~= attributeDeclaration.line;
attributeDeclaration.accept(this);
}
private:
ASTInformation* astInformation;
alias visit = ASTVisitor.visit;
}
/// Length of an invalid token
enum int INVALID_TOKEN_LENGTH = -1;
string generateFixedLengthCases()
{
import std.algorithm : map;
import std.string : format;
string[] fixedLengthTokens = ["abstract", "alias", "align", "asm", "assert",
"auto", "body", "bool", "break", "byte", "case", "cast", "catch",
"cdouble", "cent", "cfloat", "char", "class", "const", "continue",
"creal", "dchar", "debug", "default", "delegate", "delete", "deprecated",
"do", "double", "else", "enum", "export", "extern", "false", "final",
"finally", "float", "for", "foreach", "foreach_reverse", "function",
"goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout",
"int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro",
"mixin", "module", "new", "nothrow", "null", "out", "override", "package",
"pragma", "private", "protected", "public", "pure", "real", "ref",
"return", "scope", "shared", "short", "static", "struct", "super",
"switch", "synchronized", "template", "this", "throw", "true", "try",
"typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union",
"unittest", "ushort", "version", "void", "volatile", "wchar", "while",
"with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared",
"__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__",
"__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__",
"__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>",
"!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*",
"*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=",
"<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?",
"@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~",
"~="];
return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a,
a.length)).join("\n\t");
}
int tokenLength(ref const Token t) pure @safe @nogc
{
import std.algorithm : countUntil;
switch (t.type)
{
case tok!"doubleLiteral":
case tok!"floatLiteral":
case tok!"idoubleLiteral":
case tok!"ifloatLiteral":
case tok!"intLiteral":
case tok!"longLiteral":
case tok!"realLiteral":
case tok!"irealLiteral":
case tok!"uintLiteral":
case tok!"ulongLiteral":
case tok!"characterLiteral":
return cast(int) t.text.length;
case tok!"identifier":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
// TODO: Unicode line breaks and old-Mac line endings
auto c = cast(int) t.text.countUntil('\n');
if (c == -1)
return cast(int) t.text.length;
else
return c;
mixin (generateFixedLengthCases());
default:
return INVALID_TOKEN_LENGTH;
}
}
bool isBreakToken(IdType t)
{
switch (t)
{
case tok!"||":
case tok!"&&":
case tok!"(":
case tok!"[":
case tok!",":
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|=":
case tok!"|":
case tok!"-=":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"?":
case tok!"/=":
case tok!"/":
case tok!"..":
case tok!"*=":
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+=":
case tok!".":
case tok!"~":
case tok!"+":
case tok!"-":
return true;
default:
return false;
}
}
int breakCost(IdType t)
{
switch (t)
{
case tok!"||":
case tok!"&&":
return 0;
case tok!"[":
case tok!"(":
case tok!",":
return 10;
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|=":
case tok!"|":
case tok!"-=":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"?":
case tok!"/=":
case tok!"/":
case tok!"..":
case tok!"*=":
case tok!"&=":
case tok!"%=":
case tok!"%":
case tok!"+":
case tok!"-":
case tok!"~":
case tok!"+=":
return 100;
case tok!".":
return 200;
default:
return 1000;
}
}
struct State
{
this(size_t[] breaks, const Token[] tokens, int depth,
const FormatterConfig* formatterConfig, int currentLineLength,
int indentLevel)
{
this.breaks = breaks;
this._depth = depth;
import std.algorithm : map, sum;
this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum()
+ (depth * 500);
int ll = currentLineLength;
size_t breakIndex = 0;
size_t i = 0;
bool s = true;
if (breaks.length == 0)
{
_cost = int.max;
immutable int l = currentLineLength + tokens.map!(a => tokenLength(a)).sum();
s = l < formatterConfig.columnSoftLimit;
}
else
{
do
{
immutable size_t j = breakIndex < breaks.length
? breaks[breakIndex] : tokens.length;
ll += tokens[i .. j].map!(a => tokenLength(a)).sum();
if (ll > formatterConfig.columnSoftLimit)
{
s = false;
break;
}
i = j;
ll = (indentLevel + 1) * formatterConfig.indentSize;
breakIndex++;
}
while (i + 1 < tokens.length);
}
this._solved = s;
}
int cost() const pure nothrow @safe @property
{
return _cost;
}
int depth() const pure nothrow @safe @property
{
return _depth;
}
int solved() const pure nothrow @safe @property
{
return _solved;
}
int opCmp(ref const State other) const pure nothrow @safe
{
if (cost < other.cost || (cost == other.cost && ((breaks.length
&& other.breaks.length && breaks[0] > other.breaks[0]) || (_solved
&& !other.solved))))
{
return -1;
}
return other.cost > _cost;
}
bool opEquals(ref const State other) const pure nothrow @safe
{
return other.breaks == breaks;
}
size_t toHash() const nothrow @safe
{
return typeid(breaks).getHash(&breaks);
}
size_t[] breaks;
private:
int _cost;
int _depth;
bool _solved;
}
size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens,
const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel)
{
import std.container.rbtree : RedBlackTree;
import std.algorithm : filter, min;
import core.memory : GC;
enum ALGORITHMIC_COMPLEXITY_SUCKS = 25;
immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS);
int depth = 0;
auto open = new RedBlackTree!State;
open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth,
formatterConfig, currentLineLength, indentLevel));
State lowest;
GC.disable();
scope(exit) GC.enable();
while (!open.empty)
{
State current = open.front();
if (current.cost < lowest.cost)
lowest = current;
open.removeFront();
if (current.solved)
{
current.breaks[] += index;
return current.breaks;
}
foreach (next; validMoves(tokens[0 .. tokensEnd], current,
formatterConfig, currentLineLength, indentLevel, depth))
{
open.insert(next);
}
}
if (open.empty)
{
lowest.breaks[] += index;
return lowest.breaks;
}
foreach (r; open[].filter!(a => a.solved))
{
r.breaks[] += index;
return r.breaks;
}
assert (false);
}
State[] validMoves(const Token[] tokens, ref const State current,
const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel,
int depth)
{
import std.algorithm : sort, canFind;
import std.array : insertInPlace;
State[] states;
foreach (i, token; tokens)
{
if (current.breaks.canFind(i) || !isBreakToken(token.type))
continue;
size_t[] breaks;
breaks ~= current.breaks;
breaks ~= i;
sort(breaks);
states ~= State(breaks, tokens, depth + 1, formatterConfig,
currentLineLength, indentLevel);
}
return states;
}
struct FixedStack
{
void push(int i)
{
index = index == 255 ? index : index + 1;
arr[index] = i;
}
void pop()
{
index = index == 0 ? index : index - 1;
}
int top()
{
return arr[index];
}
size_t length()
{
return index;
}
private:
size_t index;
int[256] arr;
}
unittest
{
import std.string : format;
auto sourceCode = q{const Token[] tokens, ref const State current, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth};
LexerConfig config;
config.stringBehavior = StringBehavior.source;
config.whitespaceBehavior = WhitespaceBehavior.skip;
StringCache cache = StringCache(StringCache.defaultBucketCount);
auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array();
FormatterConfig formatterConfig;
auto result = chooseLineBreakTokens(0, tokens, &formatterConfig, 0, 0);
assert ([15] == result, "%s".format(result));
}