Improve line wrapping algorithm

This commit is contained in:
Hackerpilot 2015-03-22 23:40:23 -07:00
parent 78c8b14ee8
commit 340ef4c0cf
6 changed files with 92 additions and 88 deletions

View File

@ -1,7 +1,7 @@
SRC := $(shell find src -name "*.d") $(shell find libdparse/src -name "*.d") SRC := $(shell find src -name "*.d") $(shell find libdparse/src -name "*.d")
INCLUDE_PATHS := -Ilibdparse/src -Isrc INCLUDE_PATHS := -Ilibdparse/src -Isrc
DMD_FLAGS := -O -inline $(INCLUDE_PATHS) DMD_FLAGS := -O -inline $(INCLUDE_PATHS)
DMD_TEST_FLAGS := -g -w $(INCLUDE_PATHS) DMD_TEST_FLAGS := -unittest -g -w $(INCLUDE_PATHS)
LDC_FLAGS := -g -w -oq $(INCLUDE_PATHS) LDC_FLAGS := -g -w -oq $(INCLUDE_PATHS)
.PHONY: dmd ldc test .PHONY: dmd ldc test
@ -13,7 +13,7 @@ ldc: $(SRC)
-rm -f *.o -rm -f *.o
test: test:
dmd $(DMD_TEST_FLAGS) -unittest $(SRC) -ofbin/dfmt dmd $(DMD_TEST_FLAGS) $(SRC) -ofbin/dfmt
cd tests && ./test.sh cd tests && ./test.sh
bin/dfmt: $(SRC) bin/dfmt: $(SRC)

View File

@ -11,10 +11,12 @@ import dfmt.config;
struct State struct State
{ {
this(size_t[] breaks, const Token[] tokens, immutable short[] depths, int depth, this(uint breaks, const Token[] tokens, immutable short[] depths, int depth,
const Config* config, int currentLineLength, int indentLevel) pure @safe const Config* config, int currentLineLength, int indentLevel) pure @safe
{ {
import std.math : abs; import std.math : abs;
import core.bitop : popcnt, bsf;
import std.algorithm:min;
immutable remainingCharsMultiplier = 40; immutable remainingCharsMultiplier = 40;
immutable newlinePenalty = 800; immutable newlinePenalty = 800;
@ -24,18 +26,18 @@ struct State
import std.algorithm : map, sum; import std.algorithm : map, sum;
this._cost = 0; this._cost = 0;
for (size_t i = 0; i != breaks.length; ++i) for (size_t i = 0; i != uint.sizeof * 8; ++i)
{ {
immutable b = tokens[breaks[i]].type; if (((1 << i) & breaks) == 0)
immutable p = abs(depths[breaks[i]]); continue;
immutable b = tokens[i].type;
immutable p = abs(depths[i]);
immutable bc = breakCost(b) * (p == 0 ? 1 : p * 2); immutable bc = breakCost(b) * (p == 0 ? 1 : p * 2);
this._cost += bc; this._cost += bc;
} }
int ll = currentLineLength; int ll = currentLineLength;
size_t breakIndex = 0;
size_t i = 0;
this._solved = true; this._solved = true;
if (breaks.length == 0) if (breaks == 0)
{ {
immutable int l = currentLineLength + tokens.map!(a => tokenLength(a)).sum(); immutable int l = currentLineLength + tokens.map!(a => tokenLength(a)).sum();
_cost = l; _cost = l;
@ -50,9 +52,12 @@ struct State
} }
else else
{ {
do size_t i = 0;
foreach (_; 0 .. 32)
{ {
immutable size_t j = breakIndex < breaks.length ? breaks[breakIndex] : tokens.length; immutable size_t k = breaks >>> i;
immutable bool b = k == 0;
immutable size_t j = min(i + bsf(k) + 1, tokens.length);
ll += tokens[i .. j].map!(a => tokenLength(a)).sum(); ll += tokens[i .. j].map!(a => tokenLength(a)).sum();
if (ll > config.columnHardLimit) if (ll > config.columnHardLimit)
{ {
@ -63,11 +68,11 @@ struct State
_cost += (ll - config.columnSoftLimit) * remainingCharsMultiplier; _cost += (ll - config.columnSoftLimit) * remainingCharsMultiplier;
i = j; i = j;
ll = indentLevel * config.indentSize; ll = indentLevel * config.indentSize;
breakIndex++; if (b)
break;
} }
while (i + 1 < tokens.length);
} }
this._cost += breaks.length * newlinePenalty; this._cost += popcnt(breaks) * newlinePenalty;
} }
int cost() const pure nothrow @safe @property int cost() const pure nothrow @safe @property
@ -87,8 +92,9 @@ struct State
int opCmp(ref const State other) const pure nothrow @safe int opCmp(ref const State other) const pure nothrow @safe
{ {
if (cost < other.cost || (cost == other.cost && ((breaks.length import core.bitop : bsf , popcnt;
&& other.breaks.length && breaks[0] > other.breaks[0]) || (_solved && !other.solved)))) if (cost < other.cost || (cost == other.cost && ((popcnt(breaks)
&& popcnt(other.breaks) && bsf(breaks) > bsf(other.breaks)) || (_solved && !other.solved))))
{ {
return -1; return -1;
} }
@ -105,7 +111,7 @@ struct State
return typeid(breaks).getHash(&breaks); return typeid(breaks).getHash(&breaks);
} }
size_t[] breaks; uint breaks;
private: private:
int _cost; int _cost;
int _depth; int _depth;
@ -117,12 +123,23 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, immutable sho
{ {
import std.container.rbtree : RedBlackTree; import std.container.rbtree : RedBlackTree;
import std.algorithm : filter, min; import std.algorithm : filter, min;
import core.bitop:popcnt;
enum ALGORITHMIC_COMPLEXITY_SUCKS = 25; static size_t[] genRetVal(uint breaks, size_t index) pure nothrow @safe
{
auto retVal = new size_t[](popcnt(breaks));
size_t j = 0;
foreach (uint i; 0 .. uint.sizeof * 8)
if ((1 << i) & breaks)
retVal[j++] = index + i;
return retVal;
}
enum ALGORITHMIC_COMPLEXITY_SUCKS = uint.sizeof * 8;
immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS); immutable size_t tokensEnd = min(tokens.length, ALGORITHMIC_COMPLEXITY_SUCKS);
int depth = 0; int depth = 0;
auto open = new RedBlackTree!State; auto open = new RedBlackTree!State;
open.insert(State(cast(size_t[])[], tokens[0 .. tokensEnd], open.insert(State(0, tokens[0 .. tokensEnd],
depths[0 .. tokensEnd], depth, config, currentLineLength, indentLevel)); depths[0 .. tokensEnd], depth, config, currentLineLength, indentLevel));
State lowest; State lowest;
while (!open.empty) while (!open.empty)
@ -133,46 +150,31 @@ size_t[] chooseLineBreakTokens(size_t index, const Token[] tokens, immutable sho
open.removeFront(); open.removeFront();
if (current.solved) if (current.solved)
{ {
current.breaks[] += index; return genRetVal(current.breaks, index);
return current.breaks;
}
foreach (next; validMoves(tokens[0 .. tokensEnd], depths[0 .. tokensEnd],
current, config, currentLineLength, indentLevel, depth))
{
open.insert(next);
} }
validMoves!(typeof(open))(open, tokens[0 .. tokensEnd], depths[0 .. tokensEnd],
current.breaks, config, currentLineLength, indentLevel, depth);
} }
if (open.empty) if (open.empty)
{ return genRetVal(lowest.breaks, index);
lowest.breaks[] += index;
return lowest.breaks;
}
foreach (r; open[].filter!(a => a.solved)) foreach (r; open[].filter!(a => a.solved))
{ return genRetVal(r.breaks, index);
r.breaks[] += index;
return r.breaks;
}
assert(false); assert(false);
} }
State[] validMoves(const Token[] tokens, immutable short[] depths, ref const State current, void validMoves(OR)(auto ref OR output, const Token[] tokens, immutable short[] depths,
const Config* config, int currentLineLength, int indentLevel, uint current, const Config* config, int currentLineLength, int indentLevel,
int depth) pure @safe int depth) pure
{ {
import std.algorithm : sort, canFind; import std.algorithm : sort, canFind;
import std.array : insertInPlace; import std.array : insertInPlace;
State[] states;
foreach (i, token; tokens) foreach (i, token; tokens)
{ {
if (!isBreakToken(token.type) || current.breaks.canFind(i)) if (!isBreakToken(token.type) || (((1 << i) & current) != 0))
continue; continue;
size_t[] breaks; immutable uint breaks = current | (1 << i);
breaks ~= current.breaks; output.insert(State(breaks, tokens, depths, depth + 1, config,
breaks ~= i; currentLineLength, indentLevel));
sort(breaks);
states ~= State(breaks, tokens, depths, depth + 1, config,
currentLineLength, indentLevel);
} }
return states;
} }

View File

@ -12,24 +12,24 @@ string generateFixedLengthCases()
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
"char", "class", "const", "continue", "creal", "dchar", "debug", "char", "class", "const", "continue", "creal", "dchar", "debug",
"default", "delegate", "delete", "deprecated", "do", "double", "else", "default", "delegate", "delete", "deprecated", "do", "double", "else",
"enum", "export", "extern", "false", "final", "finally", "float", "for", "enum", "export", "extern", "false", "final", "finally", "float",
"foreach", "foreach_reverse", "function", "goto", "idouble", "if", "for", "foreach", "foreach_reverse", "function", "goto", "idouble",
"ifloat", "immutable", "import", "in", "inout", "int", "interface", "if", "ifloat", "immutable", "import", "in", "inout", "int",
"invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "interface", "invariant", "ireal", "is", "lazy", "long", "macro",
"new", "nothrow", "null", "out", "override", "package", "pragma", "mixin", "module", "new", "nothrow", "null", "out", "override",
"private", "protected", "public", "pure", "real", "ref", "return", "package", "pragma", "private", "protected", "public", "pure", "real",
"scope", "shared", "short", "static", "struct", "super", "switch", "ref", "return", "scope", "shared", "short", "static", "struct",
"synchronized", "template", "this", "throw", "true", "try", "typedef", "super", "switch", "synchronized", "template", "this", "throw", "true",
"typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint",
"unittest", "ushort", "version", "void", "volatile", "wchar", "while", "ulong", "union", "unittest", "ushort", "version", "void", "volatile",
"with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
"__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits",
"__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "__vector", "__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/",
"!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%",
"*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-",
"<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==",
">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
"~", "~=" "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
]; ];
} }

View File

@ -17,8 +17,9 @@ unittest
}); });
callFunc({ callFunc({
int i = 10; int i = 10;
foo(alpha_longVarName, bravo_longVarName, charlie_longVarName, delta_longVarName, foo(alpha_longVarName, bravo_longVarName, charlie_longVarName,
echo_longVarName, foxtrot_longVarName, golf_longVarName, echo_longVarName); delta_longVarName, echo_longVarName, foxtrot_longVarName,
golf_longVarName, echo_longVarName);
doStuff(withThings, andOtherStuff); doStuff(withThings, andOtherStuff);
return i; return i;
}, more_stuff); }, more_stuff);

View File

@ -11,24 +11,24 @@ string generateFixedLengthCases() {
"break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat", "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
"char", "class", "const", "continue", "creal", "dchar", "debug", "char", "class", "const", "continue", "creal", "dchar", "debug",
"default", "delegate", "delete", "deprecated", "do", "double", "else", "default", "delegate", "delete", "deprecated", "do", "double", "else",
"enum", "export", "extern", "false", "final", "finally", "float", "for", "enum", "export", "extern", "false", "final", "finally", "float",
"foreach", "foreach_reverse", "function", "goto", "idouble", "if", "for", "foreach", "foreach_reverse", "function", "goto", "idouble",
"ifloat", "immutable", "import", "in", "inout", "int", "interface", "if", "ifloat", "immutable", "import", "in", "inout", "int",
"invariant", "ireal", "is", "lazy", "long", "macro", "mixin", "module", "interface", "invariant", "ireal", "is", "lazy", "long", "macro",
"new", "nothrow", "null", "out", "override", "package", "pragma", "mixin", "module", "new", "nothrow", "null", "out", "override",
"private", "protected", "public", "pure", "real", "ref", "return", "package", "pragma", "private", "protected", "public", "pure", "real",
"scope", "shared", "short", "static", "struct", "super", "switch", "ref", "return", "scope", "shared", "short", "static", "struct",
"synchronized", "template", "this", "throw", "true", "try", "typedef", "super", "switch", "synchronized", "template", "this", "throw", "true",
"typeid", "typeof", "ubyte", "ucent", "uint", "ulong", "union", "try", "typedef", "typeid", "typeof", "ubyte", "ucent", "uint",
"unittest", "ushort", "version", "void", "volatile", "wchar", "while", "ulong", "union", "unittest", "ushort", "version", "void", "volatile",
"with", "__DATE__", "__EOF__", "__FILE__", "__FUNCTION__", "__gshared", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
"__LINE__", "__MODULE__", "__parameters", "__PRETTY_FUNCTION__", "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
"__TIME__", "__TIMESTAMP__", "__traits", "__vector", "__VENDOR__", "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits",
"__VERSION__", ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "__vector", "__VENDOR__", "__VERSION__", ",", ".", "..", "...", "/",
"!<>=", "!=", "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=", "!>", "!>=", "$", "%",
"*", "*=", "+", "++", "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++", "+=", "-",
"<=", "<>", "<>=", "=", "==", "=>", ">", ">=", ">>", ">>=", ">>>", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=", "==",
">>>=", "?", "@", "[", "]", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
"~", "~=" "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
]; ];
} }

View File

@ -16,8 +16,9 @@ unittest {
}); });
callFunc({ callFunc({
int i = 10; int i = 10;
foo(alpha_longVarName, bravo_longVarName, charlie_longVarName, delta_longVarName, foo(alpha_longVarName, bravo_longVarName, charlie_longVarName,
echo_longVarName, foxtrot_longVarName, golf_longVarName, echo_longVarName); delta_longVarName, echo_longVarName, foxtrot_longVarName,
golf_longVarName, echo_longVarName);
doStuff(withThings, andOtherStuff); doStuff(withThings, andOtherStuff);
return i; return i;
}, more_stuff); }, more_stuff);