Update DustMite

Commits:
* 1f0a0cd dustmite: Fix deprecated use of canFind
* 1035b15 dustmite: Improve progress indicator accuracy
* 556a9ab dsplit: Avoid stack overflow with very long lists
* 6b8f49c Avoid std.string.startsWith, which does pointless UTF-8 decoding
* 8eb5282 dsplit: Don't parse DDoc files as D source
* 403ef2f dsplit: Fix possible range violations in skipSymbol
This commit is contained in:
Vladimir Panteleev 2014-01-07 12:05:01 +00:00
parent f210332a70
commit bea9b9957d
2 changed files with 135 additions and 98 deletions

View file

@ -4,11 +4,12 @@
module dsplit; module dsplit;
import std.ascii;
import std.algorithm;
import std.array;
import std.file; import std.file;
import std.path; import std.path;
import std.string; import std.string;
import std.ascii;
import std.array;
debug import std.stdio; debug import std.stdio;
class Entity class Entity
@ -106,6 +107,12 @@ void optimize(Entity set)
private: private:
/// Override std.string nonsense, which does UTF-8 decoding
bool startsWith(in char[] big, in char[] small) { return big.length >= small.length && big[0..small.length] == small; }
bool startsWith(in char[] big, char c) { return big.length && big[0] == c; }
const DExtensions = [".d", ".di"];
Entity loadFile(string name, string path, ParseOptions options) Entity loadFile(string name, string path, ParseOptions options)
{ {
debug writeln("Loading ", path); debug writeln("Loading ", path);
@ -114,110 +121,123 @@ Entity loadFile(string name, string path, ParseOptions options)
result.contents = cast(string)read(path); result.contents = cast(string)read(path);
if (options.stripComments) if (options.stripComments)
if (extension(path) == ".d" || extension(path) == ".di") if (DExtensions.canFind(path.extension.toLower))
result.contents = stripDComments(result.contents); result.contents = stripDComments(result.contents);
final switch (options.mode) final switch (options.mode)
{ {
case ParseOptions.Mode.Source: case ParseOptions.Mode.Source:
switch (extension(path)) if (DExtensions.canFind(path.extension.toLower) && !result.contents.startsWith("Ddoc"))
{ result.children = parseD(result.contents);
case ".d": else
case ".di":
result.children = parseD(result.contents); return result;
// One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats // One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
default: result.children = [new Entity(result.contents, null, null)];
result.children = [new Entity(result.contents, null, null)]; return result; break;
}
case ParseOptions.Mode.Words: case ParseOptions.Mode.Words:
result.children = parseToWords(result.contents); return result; result.children = parseToWords(result.contents);
break;
} }
return result;
}
class EndOfInput : Throwable { this() { super(null); } }
void lchop(ref string r, size_t n = 1)
{
if (r.length < n)
throw new EndOfInput;
r = r[n..$];
} }
string skipSymbol(string s, ref size_t i) string skipSymbol(string s, ref size_t i)
{ {
auto start = i; auto r = s[i..$];
switch (s[i])
{ try
case '\'': switch (r[0])
i++;
if (s[i] == '\\')
i+=2;
while (s[i] != '\'')
i++;
i++;
break;
case '\\':
i+=2;
break;
case '"':
if (i && s[i-1] == 'r')
{ {
i++; case '\'':
while (s[i] != '"') r.lchop();
i++; if (r.startsWith('\\'))
i++; r.lchop(2);
} while (!r.startsWith('\''))
else r.lchop();
{ r.lchop();
i++;
while (s[i] != '"')
{
if (s[i] == '\\')
i+=2;
else
i++;
}
i++;
}
break;
case '`':
i++;
while (s[i] != '`')
i++;
i++;
break;
case '/':
i++;
if (i==s.length)
break; break;
else case '\\':
if (s[i] == '/') r.lchop(2);
{ break;
while (i < s.length && s[i] != '\r' && s[i] != '\n') case '"':
i++; r.lchop();
} while (!r.startsWith('"'))
else
if (s[i] == '*')
{
i+=3;
while (s[i-2] != '*' || s[i-1] != '/')
i++;
}
else
if (s[i] == '+')
{
i++;
int commentLevel = 1;
while (commentLevel)
{ {
if (s[i] == '/' && s[i+1]=='+') if (r.startsWith('\\'))
commentLevel++, i+=2; r.lchop(2);
else else
if (s[i] == '+' && s[i+1]=='/') r.lchop();
commentLevel--, i+=2;
else
i++;
} }
r.lchop();
break;
case 'r':
if (r.startsWith(`r"`))
{
r.lchop(2);
while (!r.startsWith('"'))
r.lchop();
r.lchop();
break;
}
else
goto default;
case '`':
r.lchop();
while (!r.startsWith('`'))
r.lchop();
r.lchop();
break;
case '/':
r.lchop();
if (r.startsWith('/'))
{
while (!r.startsWith('\r') && !r.startsWith('\n'))
r.lchop();
}
else
if (r.startsWith('*'))
{
r.lchop();
while (!r.startsWith("*/"))
r.lchop();
r.lchop(2);
}
else
if (r.startsWith('+'))
{
r.lchop();
int commentLevel = 1;
while (commentLevel)
{
if (r.startsWith("/+"))
commentLevel++, r.lchop(2);
else
if (r.startsWith("+/"))
commentLevel--, r.lchop(2);
else
r.lchop();
}
}
else
r.lchop();
break;
default:
r.lchop();
break;
} }
else catch (EndOfInput)
i++; r = null;
break;
default: auto len = s.length - i - r.length;
i++; i += len;
break; return s[i-len..i];
}
return s[start..i];
} }
/// Moves i forward over first series of EOL characters, or until first non-whitespace character /// Moves i forward over first series of EOL characters, or until first non-whitespace character
@ -364,13 +384,13 @@ string stripDComments(string s)
return result.data; return result.data;
} }
void postProcessD(ref Entity[] entities) void postProcessD(ref Entity[] entities, int depth=0)
{ {
for (int i=0; i<entities.length;) for (int i=0; i<entities.length;)
{ {
// Process comma-separated lists. Nest later items and add a dependency for the comma. // Process comma-separated lists. Nest later items and add a dependency for the comma.
if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",") if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == "," && depth < 100)
{ {
// Put the comma in its own entity, so it can have a dependency // Put the comma in its own entity, so it can have a dependency
auto comma = new Entity(entities[i].tail); auto comma = new Entity(entities[i].tail);
@ -406,7 +426,7 @@ void postProcessD(ref Entity[] entities)
continue; continue;
} }
postProcessD(entities[i].children); postProcessD(entities[i].children, depth+1);
i++; i++;
} }
} }

View file

@ -21,13 +21,16 @@ import std.random;
import dsplit; import dsplit;
// Issue 314 workarounds
alias std.string.join join; alias std.string.join join;
alias std.string.startsWith startsWith;
string dir, resultDir, tester, globalCache; string dir, resultDir, tester, globalCache;
string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); } string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }
size_t maxBreadth; size_t maxBreadth;
Entity root; Entity root;
size_t origDescendants;
bool concatPerformed; bool concatPerformed;
int tests; bool foundAnything; int tests; bool foundAnything;
bool noSave, trace; bool noSave, trace;
@ -75,12 +78,13 @@ struct Reduction
foreach (i, a; address) foreach (i, a; address)
{ {
segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length); segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length);
foreach (c; e.children[a+1..$]) foreach (c; e.children[0..a])
progress += c.descendants; progress += c.descendants;
progress++; // account for this node progress++; // account for this node
e = e.children[a]; e = e.children[a];
} }
return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", ")); progress += e.descendants;
return format("[%5.1f%%] %s [%s]", (origDescendants-progress) * 100.0 / origDescendants, name, segments.join(binary ? "" : ", "));
} }
} }
} }
@ -190,6 +194,7 @@ EOS");
optimize(root); optimize(root);
maxBreadth = getMaxBreadth(root); maxBreadth = getMaxBreadth(root);
countDescendants(root); countDescendants(root);
resetProgress();
assignID(root); assignID(root);
if (dump) if (dump)
@ -290,9 +295,15 @@ bool testAddress(size_t[] address)
return false; return false;
} }
void resetProgress()
{
origDescendants = root.descendants;
}
void testLevel(int testDepth, out bool tested, out bool changed) void testLevel(int testDepth, out bool tested, out bool changed)
{ {
tested = changed = false; tested = changed = false;
resetProgress();
enum MAX_DEPTH = 1024; enum MAX_DEPTH = 1024;
size_t[MAX_DEPTH] address; size_t[MAX_DEPTH] address;
@ -328,6 +339,12 @@ void testLevel(int testDepth, out bool tested, out bool changed)
//writefln("Scan results: tested=%s, changed=%s", tested, changed); //writefln("Scan results: tested=%s, changed=%s", tested, changed);
} }
void startIteration(int iterCount)
{
writefln("############### ITERATION %d ################", iterCount);
resetProgress();
}
/// Keep going deeper until we find a successful reduction. /// Keep going deeper until we find a successful reduction.
/// When found, finish tests at current depth and restart from top depth (new iteration). /// When found, finish tests at current depth and restart from top depth (new iteration).
/// If we reach the bottom (depth with no nodes on it), we're done. /// If we reach the bottom (depth with no nodes on it), we're done.
@ -337,7 +354,7 @@ void reduceCareful()
int iterCount; int iterCount;
do do
{ {
writefln("############### ITERATION %d ################", iterCount++); startIteration(iterCount++);
bool changed; bool changed;
int depth = 0; int depth = 0;
do do
@ -364,7 +381,7 @@ void reduceLookback()
do do
{ {
iterationChanged = false; iterationChanged = false;
writefln("############### ITERATION %d ################", iterCount++); startIteration(iterCount++);
int depth = 0, maxDepth = 0; int depth = 0, maxDepth = 0;
bool depthTested; bool depthTested;
@ -404,7 +421,7 @@ void reduceInDepth()
do do
{ {
changed = false; changed = false;
writefln("############### ITERATION %d ################", iterCount++); startIteration(iterCount++);
enum MAX_DEPTH = 1024; enum MAX_DEPTH = 1024;
size_t[MAX_DEPTH] address; size_t[MAX_DEPTH] address;
@ -1000,7 +1017,7 @@ void applyNoRemoveRegex(string[] noRemoveStr)
foreach (f; files) foreach (f; files)
{ {
assert(f.isFile); assert(f.isFile);
if (canFind!((a){return !match(f.filename, a).empty;})(noRemove)) if (noRemove.any!(a => !match(f.filename, a).empty))
{ {
mark(f); mark(f);
root.noRemove = true; root.noRemove = true;