diff --git a/DustMite/dsplit.d b/DustMite/dsplit.d index 2ea7623..c7efd6e 100644 --- a/DustMite/dsplit.d +++ b/DustMite/dsplit.d @@ -4,11 +4,12 @@ module dsplit; +import std.ascii; +import std.algorithm; +import std.array; import std.file; import std.path; import std.string; -import std.ascii; -import std.array; debug import std.stdio; class Entity @@ -106,6 +107,12 @@ void optimize(Entity set) private: +/// Override std.string nonsense, which does UTF-8 decoding +bool startsWith(in char[] big, in char[] small) { return big.length >= small.length && big[0..small.length] == small; } +bool startsWith(in char[] big, char c) { return big.length && big[0] == c; } + +const DExtensions = [".d", ".di"]; + Entity loadFile(string name, string path, ParseOptions options) { debug writeln("Loading ", path); @@ -114,110 +121,123 @@ Entity loadFile(string name, string path, ParseOptions options) result.contents = cast(string)read(path); if (options.stripComments) - if (extension(path) == ".d" || extension(path) == ".di") + if (DExtensions.canFind(path.extension.toLower)) result.contents = stripDComments(result.contents); final switch (options.mode) { case ParseOptions.Mode.Source: - switch (extension(path)) - { - case ".d": - case ".di": - result.children = parseD(result.contents); return result; + if (DExtensions.canFind(path.extension.toLower) && !result.contents.startsWith("Ddoc")) + result.children = parseD(result.contents); + else // One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats - default: - result.children = [new Entity(result.contents, null, null)]; return result; - } + result.children = [new Entity(result.contents, null, null)]; + break; case ParseOptions.Mode.Words: - result.children = parseToWords(result.contents); return result; + result.children = parseToWords(result.contents); + break; } + return result; +} + +class EndOfInput : Throwable { this() { super(null); } } +void lchop(ref string r, size_t n = 1) +{ + if (r.length < n) + throw new EndOfInput; + r = r[n..$]; } string skipSymbol(string s, ref size_t i) { - auto start = i; - switch (s[i]) - { - case '\'': - i++; - if (s[i] == '\\') - i+=2; - while (s[i] != '\'') - i++; - i++; - break; - case '\\': - i+=2; - break; - case '"': - if (i && s[i-1] == 'r') + auto r = s[i..$]; + + try + switch (r[0]) { - i++; - while (s[i] != '"') - i++; - i++; - } - else - { - i++; - while (s[i] != '"') - { - if (s[i] == '\\') - i+=2; - else - i++; - } - i++; - } - break; - case '`': - i++; - while (s[i] != '`') - i++; - i++; - break; - case '/': - i++; - if (i==s.length) + case '\'': + r.lchop(); + if (r.startsWith('\\')) + r.lchop(2); + while (!r.startsWith('\'')) + r.lchop(); + r.lchop(); break; - else - if (s[i] == '/') - { - while (i < s.length && s[i] != '\r' && s[i] != '\n') - i++; - } - else - if (s[i] == '*') - { - i+=3; - while (s[i-2] != '*' || s[i-1] != '/') - i++; - } - else - if (s[i] == '+') - { - i++; - int commentLevel = 1; - while (commentLevel) + case '\\': + r.lchop(2); + break; + case '"': + r.lchop(); + while (!r.startsWith('"')) { - if (s[i] == '/' && s[i+1]=='+') - commentLevel++, i+=2; + if (r.startsWith('\\')) + r.lchop(2); else - if (s[i] == '+' && s[i+1]=='/') - commentLevel--, i+=2; - else - i++; + r.lchop(); } + r.lchop(); + break; + case 'r': + if (r.startsWith(`r"`)) + { + r.lchop(2); + while (!r.startsWith('"')) + r.lchop(); + r.lchop(); + break; + } + else + goto default; + case '`': + r.lchop(); + while (!r.startsWith('`')) + r.lchop(); + r.lchop(); + break; + case '/': + r.lchop(); + if (r.startsWith('/')) + { + while (!r.startsWith('\r') && !r.startsWith('\n')) + r.lchop(); + } + else + if (r.startsWith('*')) + { + r.lchop(); + while (!r.startsWith("*/")) + r.lchop(); + r.lchop(2); + } + else + if (r.startsWith('+')) + { + r.lchop(); + int commentLevel = 1; + while (commentLevel) + { + if (r.startsWith("/+")) + commentLevel++, r.lchop(2); + else + if (r.startsWith("+/")) + commentLevel--, r.lchop(2); + else + r.lchop(); + } + } + else + r.lchop(); + break; + default: + r.lchop(); + break; } - else - i++; - break; - default: - i++; - break; - } - return s[start..i]; + catch (EndOfInput) + r = null; + + auto len = s.length - i - r.length; + i += len; + return s[i-len..i]; } /// Moves i forward over first series of EOL characters, or until first non-whitespace character @@ -364,13 +384,13 @@ string stripDComments(string s) return result.data; } -void postProcessD(ref Entity[] entities) +void postProcessD(ref Entity[] entities, int depth=0) { for (int i=0; i= 1 && entities[i].tail.stripD() == ",") + if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == "," && depth < 100) { // Put the comma in its own entity, so it can have a dependency auto comma = new Entity(entities[i].tail); @@ -404,9 +424,9 @@ void postProcessD(ref Entity[] entities) { entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]); continue; - } + } - postProcessD(entities[i].children); + postProcessD(entities[i].children, depth+1); i++; } } diff --git a/DustMite/dustmite.d b/DustMite/dustmite.d index c96e02e..5e0046d 100644 --- a/DustMite/dustmite.d +++ b/DustMite/dustmite.d @@ -21,13 +21,16 @@ import std.random; import dsplit; +// Issue 314 workarounds alias std.string.join join; +alias std.string.startsWith startsWith; string dir, resultDir, tester, globalCache; string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); } size_t maxBreadth; Entity root; +size_t origDescendants; bool concatPerformed; int tests; bool foundAnything; bool noSave, trace; @@ -75,12 +78,13 @@ struct Reduction foreach (i, a; address) { segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length); - foreach (c; e.children[a+1..$]) + foreach (c; e.children[0..a]) progress += c.descendants; progress++; // account for this node e = e.children[a]; } - return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", ")); + progress += e.descendants; + return format("[%5.1f%%] %s [%s]", (origDescendants-progress) * 100.0 / origDescendants, name, segments.join(binary ? "" : ", ")); } } } @@ -190,6 +194,7 @@ EOS"); optimize(root); maxBreadth = getMaxBreadth(root); countDescendants(root); + resetProgress(); assignID(root); if (dump) @@ -290,9 +295,15 @@ bool testAddress(size_t[] address) return false; } +void resetProgress() +{ + origDescendants = root.descendants; +} + void testLevel(int testDepth, out bool tested, out bool changed) { tested = changed = false; + resetProgress(); enum MAX_DEPTH = 1024; size_t[MAX_DEPTH] address; @@ -328,6 +339,12 @@ void testLevel(int testDepth, out bool tested, out bool changed) //writefln("Scan results: tested=%s, changed=%s", tested, changed); } +void startIteration(int iterCount) +{ + writefln("############### ITERATION %d ################", iterCount); + resetProgress(); +} + /// Keep going deeper until we find a successful reduction. /// When found, finish tests at current depth and restart from top depth (new iteration). /// If we reach the bottom (depth with no nodes on it), we're done. @@ -337,7 +354,7 @@ void reduceCareful() int iterCount; do { - writefln("############### ITERATION %d ################", iterCount++); + startIteration(iterCount++); bool changed; int depth = 0; do @@ -364,7 +381,7 @@ void reduceLookback() do { iterationChanged = false; - writefln("############### ITERATION %d ################", iterCount++); + startIteration(iterCount++); int depth = 0, maxDepth = 0; bool depthTested; @@ -404,7 +421,7 @@ void reduceInDepth() do { changed = false; - writefln("############### ITERATION %d ################", iterCount++); + startIteration(iterCount++); enum MAX_DEPTH = 1024; size_t[MAX_DEPTH] address; @@ -1000,7 +1017,7 @@ void applyNoRemoveRegex(string[] noRemoveStr) foreach (f; files) { assert(f.isFile); - if (canFind!((a){return !match(f.filename, a).empty;})(noRemove)) + if (noRemove.any!(a => !match(f.filename, a).empty)) { mark(f); root.noRemove = true;