Update DustMite

Commits:
* 1f0a0cd dustmite: Fix deprecated use of canFind
* 1035b15 dustmite: Improve progress indicator accuracy
* 556a9ab dsplit: Avoid stack overflow with very long lists
* 6b8f49c Avoid std.string.startsWith, which does pointless UTF-8 decoding
* 8eb5282 dsplit: Don't parse DDoc files as D source
* 403ef2f dsplit: Fix possible range violations in skipSymbol
This commit is contained in:
Vladimir Panteleev 2014-01-07 12:05:01 +00:00
parent f210332a70
commit bea9b9957d
2 changed files with 135 additions and 98 deletions

View file

@ -4,11 +4,12 @@
module dsplit;
import std.ascii;
import std.algorithm;
import std.array;
import std.file;
import std.path;
import std.string;
import std.ascii;
import std.array;
debug import std.stdio;
class Entity
@ -106,6 +107,12 @@ void optimize(Entity set)
private:
/// Override std.string nonsense, which does UTF-8 decoding
bool startsWith(in char[] big, in char[] small) { return big.length >= small.length && big[0..small.length] == small; }
bool startsWith(in char[] big, char c) { return big.length && big[0] == c; }
const DExtensions = [".d", ".di"];
Entity loadFile(string name, string path, ParseOptions options)
{
debug writeln("Loading ", path);
@ -114,110 +121,123 @@ Entity loadFile(string name, string path, ParseOptions options)
result.contents = cast(string)read(path);
if (options.stripComments)
if (extension(path) == ".d" || extension(path) == ".di")
if (DExtensions.canFind(path.extension.toLower))
result.contents = stripDComments(result.contents);
final switch (options.mode)
{
case ParseOptions.Mode.Source:
switch (extension(path))
{
case ".d":
case ".di":
result.children = parseD(result.contents); return result;
if (DExtensions.canFind(path.extension.toLower) && !result.contents.startsWith("Ddoc"))
result.children = parseD(result.contents);
else
// One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
default:
result.children = [new Entity(result.contents, null, null)]; return result;
}
result.children = [new Entity(result.contents, null, null)];
break;
case ParseOptions.Mode.Words:
result.children = parseToWords(result.contents); return result;
result.children = parseToWords(result.contents);
break;
}
return result;
}
class EndOfInput : Throwable { this() { super(null); } }
void lchop(ref string r, size_t n = 1)
{
if (r.length < n)
throw new EndOfInput;
r = r[n..$];
}
string skipSymbol(string s, ref size_t i)
{
auto start = i;
switch (s[i])
{
case '\'':
i++;
if (s[i] == '\\')
i+=2;
while (s[i] != '\'')
i++;
i++;
break;
case '\\':
i+=2;
break;
case '"':
if (i && s[i-1] == 'r')
auto r = s[i..$];
try
switch (r[0])
{
i++;
while (s[i] != '"')
i++;
i++;
}
else
{
i++;
while (s[i] != '"')
{
if (s[i] == '\\')
i+=2;
else
i++;
}
i++;
}
break;
case '`':
i++;
while (s[i] != '`')
i++;
i++;
break;
case '/':
i++;
if (i==s.length)
case '\'':
r.lchop();
if (r.startsWith('\\'))
r.lchop(2);
while (!r.startsWith('\''))
r.lchop();
r.lchop();
break;
else
if (s[i] == '/')
{
while (i < s.length && s[i] != '\r' && s[i] != '\n')
i++;
}
else
if (s[i] == '*')
{
i+=3;
while (s[i-2] != '*' || s[i-1] != '/')
i++;
}
else
if (s[i] == '+')
{
i++;
int commentLevel = 1;
while (commentLevel)
case '\\':
r.lchop(2);
break;
case '"':
r.lchop();
while (!r.startsWith('"'))
{
if (s[i] == '/' && s[i+1]=='+')
commentLevel++, i+=2;
if (r.startsWith('\\'))
r.lchop(2);
else
if (s[i] == '+' && s[i+1]=='/')
commentLevel--, i+=2;
else
i++;
r.lchop();
}
r.lchop();
break;
case 'r':
if (r.startsWith(`r"`))
{
r.lchop(2);
while (!r.startsWith('"'))
r.lchop();
r.lchop();
break;
}
else
goto default;
case '`':
r.lchop();
while (!r.startsWith('`'))
r.lchop();
r.lchop();
break;
case '/':
r.lchop();
if (r.startsWith('/'))
{
while (!r.startsWith('\r') && !r.startsWith('\n'))
r.lchop();
}
else
if (r.startsWith('*'))
{
r.lchop();
while (!r.startsWith("*/"))
r.lchop();
r.lchop(2);
}
else
if (r.startsWith('+'))
{
r.lchop();
int commentLevel = 1;
while (commentLevel)
{
if (r.startsWith("/+"))
commentLevel++, r.lchop(2);
else
if (r.startsWith("+/"))
commentLevel--, r.lchop(2);
else
r.lchop();
}
}
else
r.lchop();
break;
default:
r.lchop();
break;
}
else
i++;
break;
default:
i++;
break;
}
return s[start..i];
catch (EndOfInput)
r = null;
auto len = s.length - i - r.length;
i += len;
return s[i-len..i];
}
/// Moves i forward over first series of EOL characters, or until first non-whitespace character
@ -364,13 +384,13 @@ string stripDComments(string s)
return result.data;
}
void postProcessD(ref Entity[] entities)
void postProcessD(ref Entity[] entities, int depth=0)
{
for (int i=0; i<entities.length;)
{
// Process comma-separated lists. Nest later items and add a dependency for the comma.
if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",")
if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == "," && depth < 100)
{
// Put the comma in its own entity, so it can have a dependency
auto comma = new Entity(entities[i].tail);
@ -404,9 +424,9 @@ void postProcessD(ref Entity[] entities)
{
entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]);
continue;
}
}
postProcessD(entities[i].children);
postProcessD(entities[i].children, depth+1);
i++;
}
}

View file

@ -21,13 +21,16 @@ import std.random;
import dsplit;
// Issue 314 workarounds
alias std.string.join join;
alias std.string.startsWith startsWith;
string dir, resultDir, tester, globalCache;
string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }
size_t maxBreadth;
Entity root;
size_t origDescendants;
bool concatPerformed;
int tests; bool foundAnything;
bool noSave, trace;
@ -75,12 +78,13 @@ struct Reduction
foreach (i, a; address)
{
segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length);
foreach (c; e.children[a+1..$])
foreach (c; e.children[0..a])
progress += c.descendants;
progress++; // account for this node
e = e.children[a];
}
return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", "));
progress += e.descendants;
return format("[%5.1f%%] %s [%s]", (origDescendants-progress) * 100.0 / origDescendants, name, segments.join(binary ? "" : ", "));
}
}
}
@ -190,6 +194,7 @@ EOS");
optimize(root);
maxBreadth = getMaxBreadth(root);
countDescendants(root);
resetProgress();
assignID(root);
if (dump)
@ -290,9 +295,15 @@ bool testAddress(size_t[] address)
return false;
}
void resetProgress()
{
origDescendants = root.descendants;
}
void testLevel(int testDepth, out bool tested, out bool changed)
{
tested = changed = false;
resetProgress();
enum MAX_DEPTH = 1024;
size_t[MAX_DEPTH] address;
@ -328,6 +339,12 @@ void testLevel(int testDepth, out bool tested, out bool changed)
//writefln("Scan results: tested=%s, changed=%s", tested, changed);
}
void startIteration(int iterCount)
{
writefln("############### ITERATION %d ################", iterCount);
resetProgress();
}
/// Keep going deeper until we find a successful reduction.
/// When found, finish tests at current depth and restart from top depth (new iteration).
/// If we reach the bottom (depth with no nodes on it), we're done.
@ -337,7 +354,7 @@ void reduceCareful()
int iterCount;
do
{
writefln("############### ITERATION %d ################", iterCount++);
startIteration(iterCount++);
bool changed;
int depth = 0;
do
@ -364,7 +381,7 @@ void reduceLookback()
do
{
iterationChanged = false;
writefln("############### ITERATION %d ################", iterCount++);
startIteration(iterCount++);
int depth = 0, maxDepth = 0;
bool depthTested;
@ -404,7 +421,7 @@ void reduceInDepth()
do
{
changed = false;
writefln("############### ITERATION %d ################", iterCount++);
startIteration(iterCount++);
enum MAX_DEPTH = 1024;
size_t[MAX_DEPTH] address;
@ -1000,7 +1017,7 @@ void applyNoRemoveRegex(string[] noRemoveStr)
foreach (f; files)
{
assert(f.isFile);
if (canFind!((a){return !match(f.filename, a).empty;})(noRemove))
if (noRemove.any!(a => !match(f.filename, a).empty))
{
mark(f);
root.noRemove = true;