dfmt/src/dfmt.d

933 lines
27 KiB
D

/*******************************************************************************
* Boost Software License - Version 1.0 - August 17th, 2003
*
* Permission is hereby granted, free of charge, to any person or organization
* obtaining a copy of the software and accompanying documentation covered by
* this license (the "Software") to use, reproduce, display, distribute,
* execute, and transmit the Software, and to prepare derivative works of the
* Software, and to permit third-parties to whom the Software is furnished to
* do so, all subject to the following:
*
* The copyright notices in the Software and this entire statement, including
* the above license grant, this restriction and the following disclaimer,
* must be included in all copies of the Software, in whole or in part, and
* all derivative works of the Software, unless such copies or derivative
* works are solely in the form of machine-executable object code generated by
* a source language processor.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
******************************************************************************/
module dfmt;
import std.stdio;
import std.d.lexer;
import std.d.parser;
import std.d.formatter;
import std.d.ast;
import std.array;
immutable USAGE = "usage: %s [--inplace] [<path>...]
Formats D code.
--inplace change file in-place instead of outputing to stdout
(implicit in case of multiple files)
-h, --help display this help and exit
";
version (NoMain)
{ }
else
int main(string[] args)
{
import std.getopt;
bool inplace = false;
bool show_usage = false;
getopt(args,
"help|h", &show_usage,
"inplace", &inplace);
if (show_usage)
{
import std.path: baseName;
writef(USAGE, baseName(args[0]));
return 0;
}
File output = stdout;
ubyte[] buffer;
args.popFront();
if (args.length == 0)
{
ubyte[4096] inputBuffer;
ubyte[] b;
while (true)
{
b = stdin.rawRead(inputBuffer);
if (b.length)
buffer ~= b;
else
break;
}
format("stdin", buffer, output.lockingTextWriter());
}
else
{
import std.file;
if (args.length >= 2)
inplace = true;
while (args.length > 0)
{
const path = args.front;
args.popFront();
if (isDir(path))
{
inplace = true;
foreach (string name; dirEntries(path, "*.d", SpanMode.depth))
{
args ~= name;
}
continue;
}
File f = File(path);
buffer = new ubyte[](cast(size_t)f.size);
f.rawRead(buffer);
if (inplace)
output = File(path, "w");
format(path, buffer, output.lockingTextWriter());
}
}
return 0;
}
void format(OutputRange)(string source_desc, ubyte[] buffer, OutputRange output)
{
LexerConfig config;
config.stringBehavior = StringBehavior.source;
config.whitespaceBehavior = WhitespaceBehavior.skip;
LexerConfig parseConfig;
parseConfig.stringBehavior = StringBehavior.source;
parseConfig.whitespaceBehavior = WhitespaceBehavior.skip;
StringCache cache = StringCache(StringCache.defaultBucketCount);
ASTInformation astInformation;
FormatterConfig formatterConfig;
auto parseTokens = getTokensForParser(buffer, parseConfig, &cache);
auto mod = parseModule(parseTokens, source_desc);
auto visitor = new FormatVisitor(&astInformation);
visitor.visit(mod);
astInformation.cleanup();
auto tokens = byToken(buffer, config, &cache).array();
auto tokenFormatter = TokenFormatter!OutputRange(tokens, output, &astInformation,
&formatterConfig);
tokenFormatter.format();
}
struct TokenFormatter(OutputRange)
{
this(const(Token)[] tokens, OutputRange output, ASTInformation* astInformation,
FormatterConfig* config)
{
this.tokens = tokens;
this.output = output;
this.astInformation = astInformation;
this.config = config;
}
void format()
{
while (index < tokens.length)
formatStep();
}
invariant
{
assert (indentLevel >= 0);
}
private:
void formatStep()
{
import std.range:assumeSorted;
assert (index < tokens.length);
if (current.type == tok!"comment")
{
const i = index;
if (i > 0)
{
if (tokens[i-1].line < tokens[i].line)
{
if (tokens[i-1].type != tok!"comment"
&& tokens[i-1].type != tok!"{")
newline();
}
else
write(" ");
}
writeToken();
if (i >= tokens.length-1)
newline();
else if (tokens[i+1].line > tokens[i].line)
newline();
else if (tokens[i+1].type != tok!"{")
write(" ");
}
else if (isStringLiteral(current.type) || isNumberLiteral(current.type)
|| current.type == tok!"characterLiteral")
{
writeToken();
}
else if (current.type == tok!"module" || current.type == tok!"import")
{
auto t = current.type;
writeToken();
write(" ");
while (index < tokens.length)
{
if (current.type == tok!";")
{
writeToken();
tempIndent = 0;
if (!(t == tok!"import" && current.type == tok!"import"))
write("\n");
newline();
break;
}
else if (current.type == tok!",")
{
// compute length until next , or ;
int length_of_next_chunk = INVALID_TOKEN_LENGTH;
for (size_t i=index+1; i<tokens.length; i++)
{
if (tokens[i].type == tok!"," || tokens[i].type == tok!";")
break;
const len = tokenLength(i);
assert (len >= 0);
length_of_next_chunk += len;
}
assert (length_of_next_chunk > 0);
writeToken();
if (currentLineLength+1+length_of_next_chunk >= config.columnSoftLimit)
{
pushIndent();
newline();
}
else
write(" ");
}
else
formatStep();
}
}
else if (current.type == tok!"return")
{
writeToken();
write(" ");
}
else if (current.type == tok!"switch")
formatSwitch();
else if (current.type == tok!"for" || current.type == tok!"foreach"
|| current.type == tok!"foreach_reverse" || current.type == tok!"while"
|| current.type == tok!"if")
{
currentLineLength += currentTokenLength() + 1;
writeToken();
write(" ");
writeParens(false);
if (current.type != tok!"{" && current.type != tok!";")
{
pushIndent();
newline();
}
}
else if (isKeyword(current.type))
{
switch (current.type)
{
case tok!"default":
case tok!"cast":
writeToken();
break;
case tok!"mixin":
writeToken();
write(" ");
break;
default:
if (index + 1 < tokens.length)
{
auto next = tokens[index + 1];
if (next.type == tok!";" || next.type == tok!"("
|| next.type == tok!")" || next.type == tok!","
|| next.type == tok!"{" || next.type == tok!".")
{
writeToken();
}
else
{
writeToken();
write(" ");
}
}
else
writeToken();
break;
}
}
else if (isBasicType(current.type))
{
writeToken();
if (current.type == tok!"identifier" || isKeyword(current.type))
write(" ");
}
else if (isOperator(current.type))
{
switch (current.type)
{
case tok!"*":
if (!assumeSorted(astInformation.spaceAfterLocations)
.equalRange(current.index).empty)
{
writeToken();
write(" ");
break;
}
goto case;
case tok!"~":
case tok!"&":
case tok!"+":
case tok!"-":
if (!assumeSorted(astInformation.unaryLocations)
.equalRange(current.index).empty)
{
writeToken();
break;
}
goto binary;
case tok!"(":
writeParens(true);
break;
case tok!":":
if (!assumeSorted(astInformation.ternaryColonLocations)
.equalRange(current.index).empty)
{
write(" ");
writeToken();
write(" ");
}
else
writeToken();
break;
case tok!"@":
case tok!"!":
case tok!"...":
case tok!"[":
case tok!"++":
case tok!"--":
case tok!"$":
writeToken();
break;
case tok!"]":
writeToken();
if (current.type == tok!"identifier")
write(" ");
break;
case tok!";":
tempIndent = 0;
writeToken();
if (current.type != tok!"comment")
newline();
break;
case tok!"{":
writeBraces();
break;
case tok!".":
if (currentLineLength + nextTokenLength() >= config.columnSoftLimit)
{
pushIndent();
newline();
writeToken();
}
else
writeToken();
break;
case tok!",":
if (currentLineLength + nextTokenLength() >= config.columnSoftLimit)
{
pushIndent();
writeToken();
newline();
}
else
{
writeToken();
write(" ");
}
break;
case tok!"^^":
case tok!"^=":
case tok!"^":
case tok!"~=":
case tok!"<<=":
case tok!"<<":
case tok!"<=":
case tok!"<>=":
case tok!"<>":
case tok!"<":
case tok!"==":
case tok!"=>":
case tok!"=":
case tok!">=":
case tok!">>=":
case tok!">>>=":
case tok!">>>":
case tok!">>":
case tok!">":
case tok!"|=":
case tok!"||":
case tok!"|":
case tok!"-=":
case tok!"!<=":
case tok!"!<>=":
case tok!"!<>":
case tok!"!<":
case tok!"!=":
case tok!"!>=":
case tok!"!>":
case tok!"?":
case tok!"/=":
case tok!"/":
case tok!"..":
case tok!"*=":
case tok!"&=":
case tok!"&&":
case tok!"%=":
case tok!"%":
case tok!"+=":
binary:
if (currentLineLength + nextTokenLength() >= config.columnSoftLimit)
{
pushIndent();
newline();
}
else
write(" ");
writeToken();
write(" ");
break;
default:
assert (false, str(current.type));
}
}
else if (current.type == tok!"identifier")
{
writeToken();
if (current.type == tok!"identifier" || isKeyword(current.type)
|| current.type == tok!"@")
write(" ");
}
else
assert (false, str(current.type));
}
/// Pushes a temporary indent level
void pushIndent()
{
if (tempIndent == 0)
tempIndent++;
}
/// Pops a temporary indent level
void popIndent()
{
if (tempIndent > 0)
tempIndent--;
}
/// Writes balanced braces
void writeBraces()
{
import std.range : assumeSorted;
int depth = 0;
do
{
if (current.type == tok!"{")
{
depth++;
if (config.braceStyle == BraceStyle.otbs)
{
write(" ");
write("{");
}
else
{
newline();
write("{");
}
indentLevel++;
index++;
newline();
}
else if (current.type == tok!"}")
{
// Silly hack to format enums better.
if (peekBackIs(tok!"identifier"))
newline();
write("}");
depth--;
if (index < tokens.length-1 &&
assumeSorted(astInformation.doubleNewlineLocations)
.equalRange(tokens[index].index).length)
{
output.put("\n");
}
if (config.braceStyle == BraceStyle.otbs)
{
index++;
if (index < tokens.length && current.type == tok!"else")
write(" ");
else
{
if (peekIs(tok!"case") || peekIs(tok!"default"))
indentLevel--;
newline();
}
}
else
{
index++;
if (peekIs(tok!"case") || peekIs(tok!"default"))
indentLevel--;
newline();
}
}
else
formatStep();
}
while (index < tokens.length && depth > 0);
popIndent();
}
void writeParens(bool space_afterwards)
in
{
assert (current.type == tok!"(", str(current.type));
}
body
{
immutable t = tempIndent;
int depth = 0;
do
{
if (current.type == tok!";")
{
write("; ");
currentLineLength += 2;
index++;
continue;
}
else if (current.type == tok!"(")
{
writeToken();
depth++;
continue;
}
else if (current.type == tok!")")
{
if (peekIs(tok!"identifier") || (index + 1 < tokens.length
&& isKeyword(tokens[index + 1].type)))
{
writeToken();
if (space_afterwards)
write(" ");
}
else
writeToken();
depth--;
}
else
formatStep();
}
while (index < tokens.length && depth > 0);
popIndent();
tempIndent = t;
}
bool peekIsLabel()
{
return peekIs(tok!"identifier") && peek2Is(tok!":");
}
void formatSwitch()
{
immutable l = indentLevel;
writeToken(); // switch
write(" ");
writeParens(true);
if (current.type != tok!"{")
return;
if (config.braceStyle == BraceStyle.otbs)
write(" ");
else
newline();
writeToken();
newline();
while (index < tokens.length)
{
if (current.type == tok!"case")
{
writeToken();
write(" ");
}
else if (current.type == tok!":")
{
if (peekIs(tok!".."))
{
writeToken();
write(" ");
writeToken();
write(" ");
}
else
{
if (!(peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel()))
indentLevel++;
formatStep();
newline();
}
}
else
{
assert (current.type != tok!"}");
if (peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel())
{
indentLevel = l;
formatStep();
}
else
{
formatStep();
if (current.type == tok!"}")
break;
}
}
}
indentLevel = l;
assert (current.type == tok!"}");
writeToken();
newline();
}
int tokenLength(size_t i) pure @safe @nogc
{
import std.algorithm : countUntil;
assert (i+1 <= tokens.length);
switch (tokens[i].type)
{
case tok!"identifier":
case tok!"stringLiteral":
case tok!"wstringLiteral":
case tok!"dstringLiteral":
auto c = cast(int) tokens[i].text.countUntil('\n');
if (c == -1)
return cast(int) tokens[i].text.length;
mixin (generateFixedLengthCases());
default: return INVALID_TOKEN_LENGTH;
}
}
int currentTokenLength() pure @safe @nogc
{
return tokenLength(index);
}
int nextTokenLength() pure @safe @nogc
{
if (index + 1 >= tokens.length)
return INVALID_TOKEN_LENGTH;
return tokenLength(index + 1);
}
ref current() const @property
in
{
assert (index < tokens.length);
}
body
{
return tokens[index];
}
bool peekBackIs(IdType tokenType)
{
return (index >= 1) && tokens[index - 1].type == tokenType;
}
bool peekImplementation(IdType tokenType, size_t n)
{
auto i = index + n;
while (i < tokens.length && tokens[i].type == tok!"comment")
i++;
return i < tokens.length && tokens[i].type == tokenType;
}
bool peek2Is(IdType tokenType)
{
return peekImplementation(tokenType, 2);
}
bool peekIs(IdType tokenType)
{
return peekImplementation(tokenType, 1);
}
void newline()
{
output.put("\n");
currentLineLength = 0;
if (index < tokens.length)
{
if (current.type == tok!"}")
indentLevel--;
indent();
}
}
void write(string str)
{
currentLineLength += str.length;
output.put(str);
}
void writeToken()
{
currentLineLength += currentTokenLength();
if (current.text is null)
output.put(str(current.type));
else
output.put(current.text);
index++;
}
void indent()
{
import std.range : repeat, take;
if (config.useTabs)
foreach (i; 0 .. indentLevel + tempIndent)
{
currentLineLength += config.tabSize;
output.put("\t");
}
else
foreach (i; 0 .. indentLevel + tempIndent)
foreach (j; 0 .. config.indentSize)
{
output.put(" ");
currentLineLength++;
}
}
/// Length of an invalid token
enum int INVALID_TOKEN_LENGTH = -1;
/// Current index into the tokens array
size_t index;
/// Current indent level
int indentLevel;
/// Current temproray indententation level;
int tempIndent;
/// Length of the current line (so far)
uint currentLineLength = 0;
/// Output to write output to
OutputRange output;
/// Tokens being formatted
const(Token)[] tokens;
/// Information about the AST
ASTInformation* astInformation;
/// Configuration
FormatterConfig* config;
}
/// The only good brace styles
enum BraceStyle
{
allman,
otbs
}
/// Configuration options for formatting
struct FormatterConfig
{
/// Number of spaces used for indentation
uint indentSize = 4;
/// Use tabs or spaces
bool useTabs = false;
/// Size of a tab character
uint tabSize = 8;
/// Soft line wrap limit
uint columnSoftLimit = 80;
/// Hard line wrap limit
uint columnHardLimit = 120;
/// Use the One True Brace Style
BraceStyle braceStyle = BraceStyle.allman;
}
///
struct ASTInformation
{
/// Sorts the arrays so that binary search will work on them
void cleanup()
{
import std.algorithm : sort;
sort(doubleNewlineLocations);
sort(spaceAfterLocations);
sort(unaryLocations);
}
/// Locations of end braces for struct bodies
size_t[] doubleNewlineLocations;
/// Locations of tokens where a space is needed (such as the '*' in a type)
size_t[] spaceAfterLocations;
/// Locations of unary operators
size_t[] unaryLocations;
/// Locations of ':' operators in ternary expressions
size_t[] ternaryColonLocations;
}
/// Collects information from the AST that is useful for the formatter
final class FormatVisitor : ASTVisitor
{
///
this(ASTInformation* astInformation)
{
this.astInformation = astInformation;
}
override void visit(const FunctionBody functionBody)
{
if (functionBody.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.blockStatement.endLocation;
if (functionBody.inStatement !is null && functionBody.inStatement.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.inStatement.blockStatement.endLocation;
if (functionBody.outStatement !is null && functionBody.outStatement.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.outStatement.blockStatement.endLocation;
if (functionBody.bodyStatement !is null && functionBody.bodyStatement.blockStatement !is null)
astInformation.doubleNewlineLocations ~= functionBody.bodyStatement.blockStatement.endLocation;
functionBody.accept(this);
}
override void visit(const EnumBody enumBody)
{
astInformation.doubleNewlineLocations ~= enumBody.endLocation;
enumBody.accept(this);
}
override void visit(const Unittest unittest_)
{
astInformation.doubleNewlineLocations ~= unittest_.blockStatement.endLocation;
unittest_.accept(this);
}
override void visit(const Invariant invariant_)
{
astInformation.doubleNewlineLocations ~= invariant_.blockStatement.endLocation;
invariant_.accept(this);
}
override void visit(const StructBody structBody)
{
astInformation.doubleNewlineLocations ~= structBody.endLocation;
structBody.accept(this);
}
override void visit(const TemplateDeclaration templateDeclaration)
{
astInformation.doubleNewlineLocations ~= templateDeclaration.endLocation;
templateDeclaration.accept(this);
}
override void visit(const TypeSuffix typeSuffix)
{
if (typeSuffix.star.type != tok!"")
astInformation.spaceAfterLocations ~= typeSuffix.star.index;
typeSuffix.accept(this);
}
override void visit(const UnaryExpression unary)
{
if (unary.prefix.type == tok!"~" || unary.prefix.type == tok!"&"
|| unary.prefix.type == tok!"*" || unary.prefix.type == tok!"+"
|| unary.prefix.type == tok!"-")
{
astInformation.unaryLocations ~= unary.prefix.index;
}
unary.accept(this);
}
override void visit(const TernaryExpression ternary)
{
if (ternary.colon.type != tok!"")
astInformation.ternaryColonLocations ~= ternary.colon.index;
ternary.accept(this);
}
private:
ASTInformation* astInformation;
alias visit = ASTVisitor.visit;
}
string generateFixedLengthCases()
{
import std.algorithm:map;
import std.string:format;
string[] fixedLengthTokens = [
"abstract", "alias", "align", "asm", "assert", "auto", "body", "bool",
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
"char", "class", "const", "continue", "creal", "dchar", "debug", "default",
"delegate", "delete", "deprecated", "do", "double", "else", "enum",
"export", "extern", "false", "final", "finally", "float", "for", "foreach",
"foreach_reverse", "function", "goto", "idouble", "if", "ifloat",
"immutable", "import", "in", "inout", "int", "interface", "invariant",
"ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow",
"null", "out", "override", "package", "pragma", "private", "protected",
"public", "pure", "real", "ref", "return", "scope", "shared", "short",
"static", "struct", "super", "switch", "synchronized", "template", "this",
"throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent",
"uint", "ulong", "union", "unittest", "ushort", "version", "void",
"volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
"__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector",
"__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<",
"!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=",
"(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<",
"<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>",
">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||",
"}", "~", "~="
];
return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t");
}