From a886dc5cd0a2a28b8cf2301fb8be57783f51640e Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Wed, 18 Feb 2015 17:56:39 -0800 Subject: [PATCH 1/8] Silly experiment --- makefile | 2 +- src/dfmt.d | 329 +++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 282 insertions(+), 49 deletions(-) diff --git a/makefile b/makefile index deede85..1508c7a 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,6 @@ SRC := $(shell find src -name "*.d") $(shell find libdparse/src -name "*.d") INCLUDE_PATHS := -Ilibdparse/src -DMD_FLAGS := -g -w $(INCLUDE_PATHS) +DMD_FLAGS := -g -w -unittest $(INCLUDE_PATHS) LDC_FLAGS := -g -w -oq $(INCLUDE_PATHS) .PHONY: dmd ldc test diff --git a/src/dfmt.d b/src/dfmt.d index e44d502..a8ba68b 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -233,7 +233,7 @@ private: { if (tokens[i].type == tok!"," || tokens[i].type == tok!";") break; - const len = tokenLength(i); + const len = tokenLength(tokens[i]); assert (len >= 0); length_of_next_chunk += len; } @@ -397,7 +397,7 @@ private: break; case tok!",": writeToken(); - if (currentLineLength + expressionLength() >= config.columnSoftLimit) + if (currentLineLength + distanceToNextPreferredBreak() >= config.columnSoftLimit) { pushIndent(); newline(); @@ -405,6 +405,18 @@ private: else write(" "); break; + case tok!"=": + case tok!">=": + case tok!">>=": + case tok!">>>=": + case tok!"|=": + case tok!"-=": + case tok!"/=": + case tok!"*=": + case tok!"&=": + case tok!"%=": + case tok!"+=": + goto case; case tok!"^^": case tok!"^=": case tok!"^": @@ -417,17 +429,11 @@ private: case tok!"<": case tok!"==": case tok!"=>": - case tok!"=": - case tok!">=": - case tok!">>=": - case tok!">>>=": case tok!">>>": case tok!">>": case tok!">": - case tok!"|=": case tok!"||": case tok!"|": - case tok!"-=": case tok!"!<=": case tok!"!<>=": case tok!"!<>": @@ -436,15 +442,10 @@ private: case tok!"!>=": case tok!"!>": case tok!"?": - case tok!"/=": case tok!"/": case tok!"..": - case tok!"*=": - case tok!"&=": - case tok!"&&": - case tok!"%=": case tok!"%": - case tok!"+=": + case tok!"&&": binary: if (currentLineLength + distanceToNextPreferredBreak() >= config.columnSoftLimit) { @@ -488,33 +489,29 @@ private: tempIndent--; } - size_t expressionLength() const pure @safe @nogc + size_t expressionEndIndex() const pure @safe @nogc { size_t i = index; - size_t l = 0; int parenDepth = 0; loop: while (i < tokens.length) switch (tokens[i].type) { case tok!"(": parenDepth++; - l++; i++; break; case tok!")": parenDepth--; if (parenDepth <= 0) break loop; - l++; i++; break; case tok!";": - case tok!",": break loop; default: - l += tokenLength(i); i++; + break; } - return l; + return i; } /// Writes balanced braces @@ -709,30 +706,9 @@ private: newline(); } - int tokenLength(size_t i) const pure @safe @nogc - { - import std.algorithm : countUntil; - - assert(i + 1 <= tokens.length); - switch (tokens[i].type) - { - case tok!"identifier": - case tok!"stringLiteral": - case tok!"wstringLiteral": - case tok!"dstringLiteral": - // TODO: Unicode line breaks and old-Mac line endings - auto c = cast(int) tokens[i].text.countUntil('\n'); - if (c == -1) - return cast(int) tokens[i].text.length; - mixin (generateFixedLengthCases()); - default : - return INVALID_TOKEN_LENGTH; - } - } - int currentTokenLength() pure @safe @nogc { - return tokenLength(index); + return tokenLength(tokens[index]); } int nextTokenLength() pure @safe @nogc @@ -740,7 +716,7 @@ private: immutable size_t i = index + 1; if (i >= tokens.length) return INVALID_TOKEN_LENGTH; - return tokenLength(i); + return tokenLength(tokens[i]); } int distanceToNextPreferredBreak() pure @safe @nogc @@ -757,7 +733,7 @@ private: case tok!"(": break loop; default: - l += tokenLength(i); + l += tokenLength(tokens[i]); i++; break; } @@ -849,9 +825,6 @@ private: } } - /// Length of an invalid token - enum int INVALID_TOKEN_LENGTH = -1; - /// Current index into the tokens array size_t index; @@ -1009,6 +982,9 @@ private: alias visit = ASTVisitor.visit; } +/// Length of an invalid token +enum int INVALID_TOKEN_LENGTH = -1; + string generateFixedLengthCases() { import std.algorithm : map; @@ -1040,3 +1016,260 @@ string generateFixedLengthCases() return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t"); } + +int tokenLength(ref const Token t) pure @safe @nogc +{ + import std.algorithm : countUntil; + switch (t.type) + { + case tok!"identifier": + case tok!"stringLiteral": + case tok!"wstringLiteral": + case tok!"dstringLiteral": + // TODO: Unicode line breaks and old-Mac line endings + auto c = cast(int) t.text.countUntil('\n'); + if (c == -1) + return cast(int) t.text.length; + mixin (generateFixedLengthCases()); + default: + return INVALID_TOKEN_LENGTH; + } +} + +bool isBreakToken(IdType t) +{ + switch (t) + { + case tok!"||": + case tok!"&&": + case tok!"(": + case tok!",": + case tok!"^^": + case tok!"^=": + case tok!"^": + case tok!"~=": + case tok!"<<=": + case tok!"<<": + case tok!"<=": + case tok!"<>=": + case tok!"<>": + case tok!"<": + case tok!"==": + case tok!"=>": + case tok!"=": + case tok!">=": + case tok!">>=": + case tok!">>>=": + case tok!">>>": + case tok!">>": + case tok!">": + case tok!"|=": + case tok!"|": + case tok!"-=": + case tok!"!<=": + case tok!"!<>=": + case tok!"!<>": + case tok!"!<": + case tok!"!=": + case tok!"!>=": + case tok!"!>": + case tok!"?": + case tok!"/=": + case tok!"/": + case tok!"..": + case tok!"*=": + case tok!"&=": + case tok!"%=": + case tok!"%": + case tok!"+=": + case tok!".": + return true; + default: + return false; + } +} + +int breakCost(IdType t) +{ + switch (t) + { + case tok!"||": + case tok!"&&": + return 21; + case tok!"(": + case tok!",": + return 34; + case tok!"^^": + case tok!"^=": + case tok!"^": + case tok!"~=": + case tok!"<<=": + case tok!"<<": + case tok!"<=": + case tok!"<>=": + case tok!"<>": + case tok!"<": + case tok!"==": + case tok!"=>": + case tok!"=": + case tok!">=": + case tok!">>=": + case tok!">>>=": + case tok!">>>": + case tok!">>": + case tok!">": + case tok!"|=": + case tok!"|": + case tok!"-=": + case tok!"!<=": + case tok!"!<>=": + case tok!"!<>": + case tok!"!<": + case tok!"!=": + case tok!"!>=": + case tok!"!>": + case tok!"?": + case tok!"/=": + case tok!"/": + case tok!"..": + case tok!"*=": + case tok!"&=": + case tok!"%=": + case tok!"%": + case tok!"+=": + return 55; + case tok!".": + return 89; + default: + return 144; + } +} + +struct State +{ + this(size_t[] breaks, const Token[] tokens, int depth, + const FormatterConfig* formatterConfig, int currentLineLength, + int indentLevel) + { + this.breaks = breaks; + this._depth = depth; + import std.algorithm : map, sum; + this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + ((depth - 1) * 50); + int ll = currentLineLength; + size_t breakIndex = 0; + size_t i; + bool s = true; + do + { + immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; + ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); + writeln("ll = ", ll, " i = ", i, " j = ", j); + if (ll > formatterConfig.columnSoftLimit) + { + s = false; + break; + } + i = j; + ll = (indentLevel + 1) * formatterConfig.indentSize; + writeln("ll2 = ", ll); + breakIndex++; + } + while (i + 1 < tokens.length); + this._solved = s; + writeln("breaks = ", breaks, " solved = ", this._solved); + } + + int cost() const @property { return _cost; } + int depth() const @property { return _depth; } + int solved() const @property { return _solved; } + + int opCmp(ref const State other) const + { + if (other.cost < cost) + return -1; + return other.cost > cost; + } + + bool opEquals(ref const State other) const + { + return other.breaks == breaks; + } + + size_t[] breaks; +private: + int _cost; + int _depth; + bool _solved; +} + +size_t[] chooseLineBreakTokens(const Token[] tokens, + const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) +{ + import std.typecons : Tuple, tuple; + import std.container.rbtree : RedBlackTree; + import std.algorithm : map; + + int depth = 0; + auto open = new RedBlackTree!State; + auto closed = new RedBlackTree!(State, "a.breaks < b.breaks"); + open.insert(State(cast(size_t[])[], tokens, depth, formatterConfig, + currentLineLength, indentLevel)); + while (!open.empty) + { + State current = open.front(); + writeln("open = ", open[].map!(a => tuple(a.cost, a.solved))); + open.removeFront(); + closed.insert(current); + if (current.solved) + return current.breaks; + foreach (next; validMoves(tokens, current, formatterConfig, + currentLineLength, indentLevel, depth)) + { + auto r = closed.equalRange(next); + if (!r.empty) + { + if (current.cost > r.front.cost) + continue; + closed.remove(r); + } + open.insert(next); + } + } + writeln("No solution found"); + return open.front().breaks; +} + +State[] validMoves(const Token[] tokens, ref const State current, + const FormatterConfig* formatterConfig, int currentLineLength, + int indentLevel, int depth) +{ + import std.algorithm : sort, canFind; + import std.array:insertInPlace; + + State[] states; + foreach (i, token; tokens) + { + if (!isBreakToken(token.type) || current.breaks.canFind(i)) + continue; + size_t[] breaks; + breaks ~= current.breaks; + breaks ~= i; + sort(breaks); + states ~= State(breaks, tokens, depth + 1, formatterConfig, + currentLineLength, indentLevel); + } + writeln(states); + return states; +} + +unittest +{ + auto sourceCode = q{const Token[] tokens, ref const State current, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth}; + LexerConfig config; + config.stringBehavior = StringBehavior.source; + config.whitespaceBehavior = WhitespaceBehavior.skip; + StringCache cache = StringCache(StringCache.defaultBucketCount); + auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array(); + FormatterConfig formatterConfig; + writeln(chooseLineBreakTokens(tokens, &formatterConfig, 0, 0)); +} From 7dea7ea96213a68de84e48539cb7ce45cbe732b5 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Wed, 18 Feb 2015 18:31:41 -0800 Subject: [PATCH 2/8] moar better A* --- src/dfmt.d | 72 ++++++++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/dfmt.d b/src/dfmt.d index a8ba68b..717a07f 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -1159,42 +1159,52 @@ struct State size_t breakIndex = 0; size_t i; bool s = true; - do + if (breaks.length == 0) { - immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; - ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); - writeln("ll = ", ll, " i = ", i, " j = ", j); - if (ll > formatterConfig.columnSoftLimit) - { - s = false; - break; - } - i = j; - ll = (indentLevel + 1) * formatterConfig.indentSize; - writeln("ll2 = ", ll); - breakIndex++; + _cost = int.max; + s = false; + } + else + { + do + { + immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; + ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); + if (ll > formatterConfig.columnSoftLimit) + { + s = false; + break; + } + i = j; + ll = (indentLevel + 1) * formatterConfig.indentSize; + breakIndex++; + } + while (i + 1 < tokens.length); } - while (i + 1 < tokens.length); this._solved = s; - writeln("breaks = ", breaks, " solved = ", this._solved); } - int cost() const @property { return _cost; } - int depth() const @property { return _depth; } - int solved() const @property { return _solved; } + int cost() const pure nothrow @safe @property { return _cost; } + int depth() const pure nothrow @safe @property { return _depth; } + int solved() const pure nothrow @safe @property { return _solved; } - int opCmp(ref const State other) const + int opCmp(ref const State other) const pure nothrow @safe { - if (other.cost < cost) + if (cost < other.cost || (_solved && !other.solved)) return -1; - return other.cost > cost; + return other.cost > _cost; } - bool opEquals(ref const State other) const + bool opEquals(ref const State other) const pure nothrow @safe { return other.breaks == breaks; } + size_t toHash() const nothrow @safe + { + return typeid(breaks).getHash(&breaks); + } + size_t[] breaks; private: int _cost; @@ -1205,38 +1215,25 @@ private: size_t[] chooseLineBreakTokens(const Token[] tokens, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { - import std.typecons : Tuple, tuple; import std.container.rbtree : RedBlackTree; - import std.algorithm : map; int depth = 0; auto open = new RedBlackTree!State; - auto closed = new RedBlackTree!(State, "a.breaks < b.breaks"); open.insert(State(cast(size_t[])[], tokens, depth, formatterConfig, currentLineLength, indentLevel)); while (!open.empty) { State current = open.front(); - writeln("open = ", open[].map!(a => tuple(a.cost, a.solved))); open.removeFront(); - closed.insert(current); if (current.solved) return current.breaks; foreach (next; validMoves(tokens, current, formatterConfig, currentLineLength, indentLevel, depth)) { - auto r = closed.equalRange(next); - if (!r.empty) - { - if (current.cost > r.front.cost) - continue; - closed.remove(r); - } open.insert(next); } } - writeln("No solution found"); - return open.front().breaks; + return open.empty ? [] : open.front().breaks; } State[] validMoves(const Token[] tokens, ref const State current, @@ -1258,7 +1255,6 @@ State[] validMoves(const Token[] tokens, ref const State current, states ~= State(breaks, tokens, depth + 1, formatterConfig, currentLineLength, indentLevel); } - writeln(states); return states; } @@ -1271,5 +1267,5 @@ unittest StringCache cache = StringCache(StringCache.defaultBucketCount); auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array(); FormatterConfig formatterConfig; - writeln(chooseLineBreakTokens(tokens, &formatterConfig, 0, 0)); + assert ([15] == chooseLineBreakTokens(tokens, &formatterConfig, 0, 0)); } From 32759fafae61dc8b5485bf425b47835b89cac0c1 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Wed, 18 Feb 2015 23:21:12 -0800 Subject: [PATCH 3/8] More fixes to the A* line splitting --- src/dfmt.d | 84 +++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/dfmt.d b/src/dfmt.d index 717a07f..fd0daef 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -171,7 +171,8 @@ private: void formatStep() { - import std.range:assumeSorted; + import std.range : assumeSorted; + import std.algorithm : canFind; assert (index < tokens.length); if (current.type == tok!"comment") @@ -377,6 +378,7 @@ private: case tok!";": tempIndent = 0; writeToken(); + linebreakHints = []; if (index >= tokens.length || current.type != tok!"comment") newline(); if (peekImplementation(tok!"class",0)) @@ -386,24 +388,20 @@ private: writeBraces(); break; case tok!".": - if (currentLineLength + nextTokenLength() >= config.columnHardLimit) - { - pushIndent(); - newline(); - writeToken(); - } - else - writeToken(); + writeToken(); break; case tok!",": - writeToken(); - if (currentLineLength + distanceToNextPreferredBreak() >= config.columnSoftLimit) + if (linebreakHints.canFind(index)) { + writeToken(); pushIndent(); newline(); } else + { + writeToken(); write(" "); + } break; case tok!"=": case tok!">=": @@ -416,7 +414,13 @@ private: case tok!"&=": case tok!"%=": case tok!"+=": - goto case; + write(" "); + writeToken(); + write(" "); + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); + break; case tok!"^^": case tok!"^=": case tok!"^": @@ -447,7 +451,7 @@ private: case tok!"%": case tok!"&&": binary: - if (currentLineLength + distanceToNextPreferredBreak() >= config.columnSoftLimit) + if (linebreakHints.canFind(index)) { pushIndent(); newline(); @@ -600,6 +604,9 @@ private: { writeToken(); depth++; + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); continue; } else if (current.type == tok!")") @@ -639,6 +646,7 @@ private: while (index < tokens.length && depth > 0); popIndent(); tempIndent = t; + linebreakHints = []; } bool peekIsLabel() @@ -719,27 +727,6 @@ private: return tokenLength(tokens[i]); } - int distanceToNextPreferredBreak() pure @safe @nogc - { - size_t i = index + 1; - int l; - loop: while (i < tokens.length) switch (tokens[i].type) - { - case tok!"||": - case tok!"&&": - case tok!";": - case tok!")": - case tok!",": - case tok!"(": - break loop; - default: - l += tokenLength(tokens[i]); - i++; - break; - } - return l; - } - ref current() const @property in { @@ -846,6 +833,8 @@ private: /// Information about the AST ASTInformation* astInformation; + size_t[] linebreakHints; + /// Configuration FormatterConfig* config; } @@ -1157,7 +1146,7 @@ struct State this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + ((depth - 1) * 50); int ll = currentLineLength; size_t breakIndex = 0; - size_t i; + size_t i = 0; bool s = true; if (breaks.length == 0) { @@ -1190,8 +1179,12 @@ struct State int opCmp(ref const State other) const pure nothrow @safe { - if (cost < other.cost || (_solved && !other.solved)) + if (cost < other.cost + || (cost == other.cost && breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) + || (cost == other.cost && _solved && !other.solved)) + { return -1; + } return other.cost > _cost; } @@ -1212,7 +1205,7 @@ private: bool _solved; } -size_t[] chooseLineBreakTokens(const Token[] tokens, +size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { import std.container.rbtree : RedBlackTree; @@ -1226,19 +1219,26 @@ size_t[] chooseLineBreakTokens(const Token[] tokens, State current = open.front(); open.removeFront(); if (current.solved) - return current.breaks; + { + foreach (ref b; current.breaks) + b += index; + return current.breaks; + } foreach (next; validMoves(tokens, current, formatterConfig, currentLineLength, indentLevel, depth)) { open.insert(next); } } - return open.empty ? [] : open.front().breaks; + size_t[] retVal = open.empty ? [] : open.front().breaks; + foreach (ref b; retVal) + b += index; + return retVal; } State[] validMoves(const Token[] tokens, ref const State current, - const FormatterConfig* formatterConfig, int currentLineLength, - int indentLevel, int depth) + const FormatterConfig* formatterConfig, int currentLineLength, + int indentLevel, int depth) { import std.algorithm : sort, canFind; import std.array:insertInPlace; @@ -1267,5 +1267,5 @@ unittest StringCache cache = StringCache(StringCache.defaultBucketCount); auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array(); FormatterConfig formatterConfig; - assert ([15] == chooseLineBreakTokens(tokens, &formatterConfig, 0, 0)); + assert ([15] == chooseLineBreakTokens(0, tokens, &formatterConfig, 0, 0)); } From 96501f74181bef496c611493e937bb846fc8d23a Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 19 Feb 2015 12:43:21 -0800 Subject: [PATCH 4/8] More changes --- src/dfmt.d | 103 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 45 deletions(-) diff --git a/src/dfmt.d b/src/dfmt.d index fd0daef..7c984f3 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -172,7 +172,7 @@ private: void formatStep() { import std.range : assumeSorted; - import std.algorithm : canFind; + import std.algorithm : canFind; assert (index < tokens.length); if (current.type == tok!"comment") @@ -378,7 +378,7 @@ private: case tok!";": tempIndent = 0; writeToken(); - linebreakHints = []; + linebreakHints = []; if (index >= tokens.length || current.type != tok!"comment") newline(); if (peekImplementation(tok!"class",0)) @@ -388,20 +388,20 @@ private: writeBraces(); break; case tok!".": - writeToken(); + writeToken(); break; case tok!",": if (linebreakHints.canFind(index)) { - writeToken(); + writeToken(); pushIndent(); newline(); } else - { - writeToken(); + { + writeToken(); write(" "); - } + } break; case tok!"=": case tok!">=": @@ -414,13 +414,13 @@ private: case tok!"&=": case tok!"%=": case tok!"+=": - write(" "); - writeToken(); - write(" "); - immutable size_t i = expressionEndIndex(); - linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], - config, currentLineLength, indentLevel); - break; + write(" "); + writeToken(); + write(" "); + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); + break; case tok!"^^": case tok!"^=": case tok!"^": @@ -604,9 +604,9 @@ private: { writeToken(); depth++; - immutable size_t i = expressionEndIndex(); - linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], - config, currentLineLength, indentLevel); + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); continue; } else if (current.type == tok!")") @@ -646,7 +646,7 @@ private: while (index < tokens.length && depth > 0); popIndent(); tempIndent = t; - linebreakHints = []; + linebreakHints = []; } bool peekIsLabel() @@ -833,7 +833,7 @@ private: /// Information about the AST ASTInformation* astInformation; - size_t[] linebreakHints; + size_t[] linebreakHints; /// Configuration FormatterConfig* config; @@ -851,19 +851,14 @@ struct FormatterConfig { /// Number of spaces used for indentation uint indentSize = 4; - /// Use tabs or spaces bool useTabs = false; - /// Size of a tab character uint tabSize = 8; - /// Soft line wrap limit uint columnSoftLimit = 80; - /// Hard line wrap limit uint columnHardLimit = 120; - /// Use the One True Brace Style BraceStyle braceStyle = BraceStyle.allman; } @@ -1011,6 +1006,18 @@ int tokenLength(ref const Token t) pure @safe @nogc import std.algorithm : countUntil; switch (t.type) { + case tok!"doubleLiteral": + case tok!"floatLiteral": + case tok!"idoubleLiteral": + case tok!"ifloatLiteral": + case tok!"intLiteral": + case tok!"longLiteral": + case tok!"realLiteral": + case tok!"irealLiteral": + case tok!"uintLiteral": + case tok!"ulongLiteral": + case tok!"characterLiteral": + return cast(int) t.text.length; case tok!"identifier": case tok!"stringLiteral": case tok!"wstringLiteral": @@ -1019,6 +1026,8 @@ int tokenLength(ref const Token t) pure @safe @nogc auto c = cast(int) t.text.countUntil('\n'); if (c == -1) return cast(int) t.text.length; + else + return c; mixin (generateFixedLengthCases()); default: return INVALID_TOKEN_LENGTH; @@ -1072,6 +1081,9 @@ bool isBreakToken(IdType t) case tok!"%": case tok!"+=": case tok!".": + case tok!"~": + case tok!"+": + case tok!"-": return true; default: return false; @@ -1084,10 +1096,10 @@ int breakCost(IdType t) { case tok!"||": case tok!"&&": - return 21; + return 0; case tok!"(": case tok!",": - return 34; + return 10; case tok!"^^": case tok!"^=": case tok!"^": @@ -1125,6 +1137,9 @@ int breakCost(IdType t) case tok!"&=": case tok!"%=": case tok!"%": + case tok!"+": + case tok!"-": + case tok!"~": case tok!"+=": return 55; case tok!".": @@ -1151,7 +1166,7 @@ struct State if (breaks.length == 0) { _cost = int.max; - s = false; + s = tokens.map!(a => tokenLength(a)).sum() < formatterConfig.columnSoftLimit; } else { @@ -1180,11 +1195,11 @@ struct State int opCmp(ref const State other) const pure nothrow @safe { if (cost < other.cost - || (cost == other.cost && breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) - || (cost == other.cost && _solved && !other.solved)) - { + || (cost == other.cost && breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) + || (cost == other.cost && _solved && !other.solved)) + { return -1; - } + } return other.cost > _cost; } @@ -1212,33 +1227,31 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, int depth = 0; auto open = new RedBlackTree!State; - open.insert(State(cast(size_t[])[], tokens, depth, formatterConfig, - currentLineLength, indentLevel)); + open.insert(State(cast(size_t[])[], tokens, depth, formatterConfig, currentLineLength, indentLevel)); while (!open.empty) { State current = open.front(); open.removeFront(); if (current.solved) - { - foreach (ref b; current.breaks) - b += index; - return current.breaks; - } - foreach (next; validMoves(tokens, current, formatterConfig, - currentLineLength, indentLevel, depth)) + { + foreach (ref b; current.breaks) + b += index; + return current.breaks; + } + foreach (next; validMoves(tokens, current, formatterConfig, currentLineLength, indentLevel, depth)) { open.insert(next); } } size_t[] retVal = open.empty ? [] : open.front().breaks; - foreach (ref b; retVal) - b += index; - return retVal; + foreach (ref b; retVal) + b += index; + return retVal; } State[] validMoves(const Token[] tokens, ref const State current, - const FormatterConfig* formatterConfig, int currentLineLength, - int indentLevel, int depth) + const FormatterConfig* formatterConfig, int currentLineLength, + int indentLevel, int depth) { import std.algorithm : sort, canFind; import std.array:insertInPlace; From 30c8134b93152b159d1c0d7494df19f8e9660682 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 19 Feb 2015 16:14:55 -0800 Subject: [PATCH 5/8] Now no longer goes completely out of control with long expressions --- src/dfmt.d | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/dfmt.d b/src/dfmt.d index 7c984f3..719618a 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -391,7 +391,8 @@ private: writeToken(); break; case tok!",": - if (linebreakHints.canFind(index)) + if (linebreakHints.canFind(index) || (linebreakHints.length == 0 + && currentLineLength > config.columnSoftLimit)) { writeToken(); pushIndent(); @@ -402,6 +403,9 @@ private: writeToken(); write(" "); } + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); break; case tok!"=": case tok!">=": @@ -1141,11 +1145,11 @@ int breakCost(IdType t) case tok!"-": case tok!"~": case tok!"+=": - return 55; + return 100; case tok!".": - return 89; + return 200; default: - return 144; + return 1000; } } @@ -1158,7 +1162,7 @@ struct State this.breaks = breaks; this._depth = depth; import std.algorithm : map, sum; - this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + ((depth - 1) * 50); + this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + ((depth - 1) * 200); int ll = currentLineLength; size_t breakIndex = 0; size_t i = 0; @@ -1166,7 +1170,8 @@ struct State if (breaks.length == 0) { _cost = int.max; - s = tokens.map!(a => tokenLength(a)).sum() < formatterConfig.columnSoftLimit; + immutable int l = currentLineLength + tokens.map!(a => tokenLength(a)).sum(); + s = l < formatterConfig.columnSoftLimit; } else { @@ -1195,8 +1200,9 @@ struct State int opCmp(ref const State other) const pure nothrow @safe { if (cost < other.cost - || (cost == other.cost && breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) - || (cost == other.cost && _solved && !other.solved)) + || (cost == other.cost + && ((breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) + || (_solved && !other.solved)))) { return -1; } @@ -1219,42 +1225,42 @@ private: int _depth; bool _solved; } - size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { import std.container.rbtree : RedBlackTree; + import std.algorithm : min; + enum ALGORITHMIC_COMPLEXITY_SUCKS = 20; + immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS); int depth = 0; auto open = new RedBlackTree!State; - open.insert(State(cast(size_t[])[], tokens, depth, formatterConfig, currentLineLength, indentLevel)); + open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth, formatterConfig, + currentLineLength, indentLevel)); while (!open.empty) { State current = open.front(); open.removeFront(); if (current.solved) { - foreach (ref b; current.breaks) - b += index; + current.breaks[] += index; return current.breaks; } - foreach (next; validMoves(tokens, current, formatterConfig, currentLineLength, indentLevel, depth)) + foreach (next; validMoves(tokens[0 .. tokensEnd], current, formatterConfig, + currentLineLength, indentLevel, depth)) { open.insert(next); } } size_t[] retVal = open.empty ? [] : open.front().breaks; - foreach (ref b; retVal) - b += index; + retVal[] += index; return retVal; } - State[] validMoves(const Token[] tokens, ref const State current, - const FormatterConfig* formatterConfig, int currentLineLength, - int indentLevel, int depth) + const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth) { import std.algorithm : sort, canFind; - import std.array:insertInPlace; + import std.array : insertInPlace; State[] states; foreach (i, token; tokens) @@ -1273,6 +1279,7 @@ State[] validMoves(const Token[] tokens, ref const State current, unittest { + import std.string : format; auto sourceCode = q{const Token[] tokens, ref const State current, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth}; LexerConfig config; config.stringBehavior = StringBehavior.source; @@ -1280,5 +1287,6 @@ unittest StringCache cache = StringCache(StringCache.defaultBucketCount); auto tokens = byToken(cast(ubyte[]) sourceCode, config, &cache).array(); FormatterConfig formatterConfig; - assert ([15] == chooseLineBreakTokens(0, tokens, &formatterConfig, 0, 0)); + auto result = chooseLineBreakTokens(0, tokens, &formatterConfig, 0, 0); + assert ([15] == result, "%s".format(result)); } From ebe56f3e5638f88f75b158b9e427ec78cafe2821 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 19 Feb 2015 16:24:21 -0800 Subject: [PATCH 6/8] GC hax --- src/dfmt.d | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dfmt.d b/src/dfmt.d index 719618a..0d4eaf1 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -1230,6 +1230,7 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, { import std.container.rbtree : RedBlackTree; import std.algorithm : min; + import core.memory : GC; enum ALGORITHMIC_COMPLEXITY_SUCKS = 20; immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS); @@ -1237,6 +1238,8 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, auto open = new RedBlackTree!State; open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth, formatterConfig, currentLineLength, indentLevel)); + GC.disable(); + scope(exit) GC.enable(); while (!open.empty) { State current = open.front(); From df676b9fad5a2712816403d6f1a754e3b30ed006 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 19 Feb 2015 17:10:01 -0800 Subject: [PATCH 7/8] Better line wrapping --- src/dfmt.d | 93 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/src/dfmt.d b/src/dfmt.d index 0d4eaf1..6830d78 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -425,6 +425,12 @@ private: linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], config, currentLineLength, indentLevel); break; + case tok!"&&": + case tok!"||": + immutable size_t i = expressionEndIndex(); + linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], + config, currentLineLength, indentLevel); + goto case; case tok!"^^": case tok!"^=": case tok!"^": @@ -440,7 +446,6 @@ private: case tok!">>>": case tok!">>": case tok!">": - case tok!"||": case tok!"|": case tok!"!<=": case tok!"!<>=": @@ -453,7 +458,6 @@ private: case tok!"/": case tok!"..": case tok!"%": - case tok!"&&": binary: if (linebreakHints.canFind(index)) { @@ -986,21 +990,21 @@ string generateFixedLengthCases() "finally", "float", "for", "foreach", "foreach_reverse", "function", "goto", "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", "lazy", "long", "macro", - "mixin", "module", "new", "nothrow", "null", "out", "override", - "package", "pragma", "private", "protected", "public", "pure", "real", - "ref", "return", "scope", "shared", "short", "static", "struct", "super", + "mixin", "module", "new", "nothrow", "null", "out", "override", "package", + "pragma", "private", "protected", "public", "pure", "real", "ref", + "return", "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", "template", "this", "throw", "true", "try", - "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", - "union", "unittest", "ushort", "version", "void", "volatile", "wchar", - "while", "with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", - "__gshared", "__LINE__", "__MODULE__", "__parameters", - "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", - "__vector", "__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/", - "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%", "%=", - "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-", "--", "-=", - ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==", "=>", ">", - ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^", "^=", "^^", - "^^=", "{", "|", "|=", "||", "}", "~", "~="]; + "typedef", "typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", + "unittest", "ushort", "version", "void", "volatile", "wchar", "while", + "with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared", + "__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", + "__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", + "__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", + "!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", + "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", + "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", + "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", + "~="]; return fixedLengthTokens.map!(a => format(`case tok!"%s": return %d;`, a, a.length)).join("\n\t"); } @@ -1162,7 +1166,9 @@ struct State this.breaks = breaks; this._depth = depth; import std.algorithm : map, sum; - this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + ((depth - 1) * 200); + + this._cost = breaks.map!(b => breakCost(tokens[b].type)).sum() + + (depth * 300); int ll = currentLineLength; size_t breakIndex = 0; size_t i = 0; @@ -1177,7 +1183,8 @@ struct State { do { - immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; + immutable size_t j = breakIndex < breaks.length + ? breaks[breakIndex] : tokens.length; ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); if (ll > formatterConfig.columnSoftLimit) { @@ -1193,16 +1200,26 @@ struct State this._solved = s; } - int cost() const pure nothrow @safe @property { return _cost; } - int depth() const pure nothrow @safe @property { return _depth; } - int solved() const pure nothrow @safe @property { return _solved; } + int cost() const pure nothrow @safe @property + { + return _cost; + } + + int depth() const pure nothrow @safe @property + { + return _depth; + } + + int solved() const pure nothrow @safe @property + { + return _solved; + } int opCmp(ref const State other) const pure nothrow @safe { - if (cost < other.cost - || (cost == other.cost - && ((breaks.length && other.breaks.length && breaks[0] > other.breaks[0]) - || (_solved && !other.solved)))) + if (cost < other.cost || (cost == other.cost && ((breaks.length + && other.breaks.length && breaks[0] > other.breaks[0]) || (_solved + && !other.solved)))) { return -1; } @@ -1225,19 +1242,20 @@ private: int _depth; bool _solved; } + size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel) { import std.container.rbtree : RedBlackTree; - import std.algorithm : min; + import std.algorithm : filter, min; import core.memory : GC; enum ALGORITHMIC_COMPLEXITY_SUCKS = 20; immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS); int depth = 0; auto open = new RedBlackTree!State; - open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth, formatterConfig, - currentLineLength, indentLevel)); + open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], depth, + formatterConfig, currentLineLength, indentLevel)); GC.disable(); scope(exit) GC.enable(); while (!open.empty) @@ -1249,18 +1267,25 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, current.breaks[] += index; return current.breaks; } - foreach (next; validMoves(tokens[0 .. tokensEnd], current, formatterConfig, - currentLineLength, indentLevel, depth)) + foreach (next; validMoves(tokens[0 .. tokensEnd], current, + formatterConfig, currentLineLength, indentLevel, depth)) { open.insert(next); } } - size_t[] retVal = open.empty ? [] : open.front().breaks; - retVal[] += index; - return retVal; + if (open.empty) + return isBreakToken(tokens[0].type) ? [index] : []; + foreach (r; open[].filter!(a => a.solved)) + { + r.breaks[] += index; + return r.breaks; + } + assert (false); } + State[] validMoves(const Token[] tokens, ref const State current, - const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, int depth) + const FormatterConfig* formatterConfig, int currentLineLength, int indentLevel, + int depth) { import std.algorithm : sort, canFind; import std.array : insertInPlace; From f0d033eb74df674229cdee3bbc043dc95c30895c Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Thu, 19 Feb 2015 17:50:45 -0800 Subject: [PATCH 8/8] Stop messing up case statements --- libdparse | 2 +- src/dfmt.d | 64 ++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/libdparse b/libdparse index 6ac2d81..c02db8d 160000 --- a/libdparse +++ b/libdparse @@ -1 +1 @@ -Subproject commit 6ac2d819363eacb3cec334e9a6f3c10dfc8bb280 +Subproject commit c02db8d1c8c392f0afe164bc2e318748f76aa5aa diff --git a/src/dfmt.d b/src/dfmt.d index 6830d78..1f9e236 100644 --- a/src/dfmt.d +++ b/src/dfmt.d @@ -364,6 +364,14 @@ private: tempIndent = 0; newline(); } + else if (!assumeSorted(astInformation.caseEndLocations) + .equalRange(current.index).empty) + { + if (!(peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel())) + indentLevel++; + writeToken(); + newline(); + } else { write(" : "); @@ -388,6 +396,12 @@ private: writeBraces(); break; case tok!".": + if (linebreakHints.canFind(index) || (linebreakHints.length == 0 + && currentLineLength + nextTokenLength() > config.columnHardLimit)) + { + pushIndent(); + newline(); + } writeToken(); break; case tok!",": @@ -615,6 +629,12 @@ private: immutable size_t i = expressionEndIndex(); linebreakHints = chooseLineBreakTokens(index, tokens[index .. i], config, currentLineLength, indentLevel); + if (linebreakHints.length == 0 && currentLineLength > config.columnSoftLimit + && current.type != tok!")") + { + pushIndent(); + newline(); + } continue; } else if (current.type == tok!")") @@ -683,22 +703,12 @@ private: writeToken(); write(" "); } - else if (current.type == tok!":") + else if (current.type == tok!":" && peekIs(tok!"..")) { - if (peekIs(tok!"..")) - { - writeToken(); - write(" "); - writeToken(); - write(" "); - } - else - { - if (!(peekIs(tok!"case") || peekIs(tok!"default") || peekIsLabel())) - indentLevel++; - formatStep(); - newline(); - } + writeToken(); + write(" "); + writeToken(); + write(" "); } else { @@ -882,6 +892,7 @@ struct ASTInformation sort(spaceAfterLocations); sort(unaryLocations); sort(attributeDeclarationLines); + sort(caseEndLocations); } /// Locations of end braces for struct bodies @@ -895,6 +906,9 @@ struct ASTInformation /// Lines containing attribute declarations size_t[] attributeDeclarationLines; + + /// Case statement colon locations + size_t[] caseEndLocations; } /// Collects information from the AST that is useful for the formatter @@ -906,6 +920,24 @@ final class FormatVisitor : ASTVisitor this.astInformation = astInformation; } + override void visit(const DefaultStatement defaultStatement) + { + astInformation.caseEndLocations ~= defaultStatement.colonLocation; + defaultStatement.accept(this); + } + + override void visit(const CaseStatement caseStatement) + { + astInformation.caseEndLocations ~= caseStatement.colonLocation; + caseStatement.accept(this); + } + + override void visit(const CaseRangeStatement caseRangeStatement) + { + astInformation.caseEndLocations ~= caseRangeStatement.colonLocation; + caseRangeStatement.accept(this); + } + override void visit(const FunctionBody functionBody) { if (functionBody.blockStatement !is null) @@ -1274,7 +1306,7 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, } } if (open.empty) - return isBreakToken(tokens[0].type) ? [index] : []; + return (tokens.length > 0 && isBreakToken(tokens[0].type)) ? [index] : []; foreach (r; open[].filter!(a => a.solved)) { r.breaks[] += index;