diff --git a/DustMite/README b/DustMite/README new file mode 100644 index 0000000..bc20d15 --- /dev/null +++ b/DustMite/README @@ -0,0 +1,6 @@ +This is DustMite, a D source code minimization tool. +For documentation, see the GitHub wiki: +https://github.com/CyberShadow/DustMite/wiki + +DustMite was created by Vladimir Panteleev +and is released into the Public Domain. diff --git a/DustMite/dsplit.d b/DustMite/dsplit.d new file mode 100644 index 0000000..9bba0d3 --- /dev/null +++ b/DustMite/dsplit.d @@ -0,0 +1,528 @@ +/// Very simplistic D source code "parser" +/// Written by Vladimir Panteleev +/// Released into the Public Domain + +module dsplit; + +import std.file; +import std.path; +import std.string; +import std.ascii; +import std.array; +debug import std.stdio; + +class Entity +{ + string head; + Entity[] children; + string tail; + + string filename, contents; + @property bool isFile() { return filename != ""; } + + bool isPair; /// internal hint + bool noRemove; /// don't try removing this entity (children OK) + + bool removed; /// For dangling dependencies + Entity[] dependencies; + + int id; /// For diagnostics + size_t descendants; /// For progress display + + this(string head = null, Entity[] children = null, string tail = null, string filename = null, bool isPair = false) + { + this.head = head; + this.children = children; + this.tail = tail; + this.filename = filename; + this.isPair = isPair; + } +} + +struct ParseOptions +{ + enum Mode { Source, Words } + + bool stripComments; + Mode mode; +} + +Entity loadFiles(ref string path, ParseOptions options) +{ + if (isFile(path)) + { + auto filePath = path; + path = stripExtension(path); + return loadFile(baseName(filePath).replace(`\`, `/`), filePath, options); + } + else + { + auto set = new Entity(); + foreach (string entry; dirEntries(path, SpanMode.breadth)) + if (isFile(entry)) + { + assert(entry.startsWith(path)); + auto name = entry[path.length+1..$]; + set.children ~= loadFile(name, entry, options); + } + return set; + } +} + +enum BIN_SIZE = 2; + +void optimize(Entity set) +{ + static void group(ref Entity[] set, size_t start, size_t end) + { + //set = set[0..start] ~ [new Entity(removable, set[start..end])] ~ set[end..$]; + set.replaceInPlace(start, end, [new Entity(null, set[start..end].dup, null)]); + } + + static void clusterBy(ref Entity[] set, size_t binSize) + { + while (set.length > binSize) + { + auto size = set.length >= binSize*2 ? binSize : (set.length+1) / 2; + //auto size = binSize; + + auto bins = set.length/size; + if (set.length % size > 1) + group(set, bins*size, set.length); + foreach_reverse (i; 0..bins) + group(set, i*size, (i+1)*size); + } + } + + static void doOptimize(Entity e) + { + foreach (c; e.children) + doOptimize(c); + clusterBy(e.children, BIN_SIZE); + } + + doOptimize(set); +} + +private: + +Entity loadFile(string name, string path, ParseOptions options) +{ + debug writeln("Loading ", path); + auto result = new Entity(); + result.filename = name.replace(`\`, `/`); + result.contents = cast(string)read(path); + + if (options.stripComments) + if (extension(path) == ".d" || extension(path) == ".di") + result.contents = stripDComments(result.contents); + + final switch (options.mode) + { + case ParseOptions.Mode.Source: + switch (extension(path)) + { + case ".d": + case ".di": + result.children = parseD(result.contents); return result; + // One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats + default: + result.children = [new Entity(result.contents, null, null)]; return result; + } + case ParseOptions.Mode.Words: + result.children = parseToWords(result.contents); return result; + } +} + +string skipSymbol(string s, ref size_t i) +{ + auto start = i; + switch (s[i]) + { + case '\'': + i++; + if (s[i] == '\\') + i+=2; + while (s[i] != '\'') + i++; + i++; + break; + case '\\': + i+=2; + break; + case '"': + if (i && s[i-1] == 'r') + { + i++; + while (s[i] != '"') + i++; + i++; + } + else + { + i++; + while (s[i] != '"') + { + if (s[i] == '\\') + i+=2; + else + i++; + } + i++; + } + break; + case '`': + i++; + while (s[i] != '`') + i++; + i++; + break; + case '/': + i++; + if (i==s.length) + break; + else + if (s[i] == '/') + { + while (i < s.length && s[i] != '\r' && s[i] != '\n') + i++; + } + else + if (s[i] == '*') + { + i+=3; + while (s[i-2] != '*' || s[i-1] != '/') + i++; + } + else + if (s[i] == '+') + { + i++; + int commentLevel = 1; + while (commentLevel) + { + if (s[i] == '/' && s[i+1]=='+') + commentLevel++, i+=2; + else + if (s[i] == '+' && s[i+1]=='/') + commentLevel--, i+=2; + else + i++; + } + } + else + i++; + break; + default: + i++; + break; + } + return s[start..i]; +} + +/// Moves i forward over first series of EOL characters, or until first non-whitespace character +void skipToEOL(string s, ref size_t i) +{ + while (i < s.length) + { + if (s[i] == '\r' || s[i] == '\n') + { + while (i < s.length && (s[i] == '\r' || s[i] == '\n')) + i++; + return; + } + else + if (isWhite(s[i])) + i++; + else + if (s[i..$].startsWith("//")) + skipSymbol(s, i); + else + break; + } +} + +/// Moves i backwards to the beginning of the current line, but not any further than start +void backToEOL(string s, ref size_t i, size_t start) +{ + while (i>start && isWhite(s[i-1]) && s[i-1] != '\n') + i--; +} + +Entity[] parseD(string s) +{ + size_t i = 0; + size_t start; + string innerTail; + + Entity[] parseScope(char end) + { + // Here be dragons. + + enum MAX_SPLITTER_LEVELS = 5; + struct DSplitter { char open, close, sep; } + static const DSplitter[MAX_SPLITTER_LEVELS] splitters = [{'{','}',';'}, {'(',')'}, {'[',']'}, {sep:','}, {sep:' '}]; + + Entity[][MAX_SPLITTER_LEVELS] splitterQueue; + + Entity[] terminateLevel(int level) + { + if (level == MAX_SPLITTER_LEVELS) + { + auto text = s[start..i]; + start = i; + return splitText(text); + } + else + { + auto next = terminateLevel(level+1); + if (next.length <= 1) + splitterQueue[level] ~= next; + else + splitterQueue[level] ~= new Entity(null, next, null); + auto r = splitterQueue[level]; + splitterQueue[level] = null; + return r; + } + } + + string terminateText() + { + auto r = s[start..i]; + start = i; + return r; + } + + characterLoop: + while (i < s.length) + { + char c = s[i]; + foreach (int level, info; splitters) + if (info.sep && c == info.sep) + { + auto children = terminateLevel(level+1); + assert(i == start); + i++; skipToEOL(s, i); + splitterQueue[level] ~= new Entity(null, children, terminateText()); + continue characterLoop; + } + else + if (info.open && c == info.open) + { + auto openPos = i; + backToEOL(s, i, start); + auto pairHead = terminateLevel(level+1); + + i = openPos+1; skipToEOL(s, i); + auto startSequence = terminateText(); + auto bodyContents = parseScope(info.close); + + auto pairBody = new Entity(startSequence, bodyContents, innerTail); + + if (pairHead.length == 0) + splitterQueue[level] ~= pairBody; + else + if (pairHead.length == 1) + splitterQueue[level] ~= new Entity(null, pairHead ~ pairBody, null, null, true); + else + splitterQueue[level] ~= new Entity(null, [new Entity(null, pairHead, null), pairBody], null, null, true); + continue characterLoop; + } + + if (end && c == end) + { + auto closePos = i; + backToEOL(s, i, start); + auto result = terminateLevel(0); + i = closePos+1; skipToEOL(s, i); + innerTail = terminateText(); + return result; + } + else + skipSymbol(s, i); + } + + innerTail = null; + return terminateLevel(0); + } + + auto result = parseScope(0); + postProcessD(result); + return result; +} + +string stripDComments(string s) +{ + auto result = appender!string(); + size_t i = 0; + while (i < s.length) + { + auto sym = skipSymbol(s, i); + if (!sym.startsWithComment()) + result.put(sym); + } + return result.data; +} + +void postProcessD(ref Entity[] entities) +{ + for (int i=0; i= 1 && entities[i].tail.stripD() == ",") + { + auto comma = new Entity(entities[i].tail); + entities[i].children ~= comma; + entities[i].tail = null; + comma.dependencies ~= [entities[i].children[$-2], getHeadEntity(entities[i+1])]; + } + + // Group together consecutive entities which might represent a single language construct + // There is no penalty for false positives, so accuracy is not very important + + if (i+2 <= entities.length && entities.length > 2 && ( + (getHeadText(entities[i]).startsWithWord("do") && getHeadText(entities[i+1]).isWord("while")) + || (getHeadText(entities[i]).startsWithWord("try") && getHeadText(entities[i+1]).startsWithWord("catch")) + || (getHeadText(entities[i]).startsWithWord("try") && getHeadText(entities[i+1]).startsWithWord("finally")) + || (getHeadText(entities[i+1]).isWord("in")) + || (getHeadText(entities[i+1]).isWord("out")) + || (getHeadText(entities[i+1]).isWord("body")) + )) + { + entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]); + continue; + } + + postProcessD(entities[i].children); + i++; + } +} + +const bool[string] wordsToSplit; +static this() { wordsToSplit = ["else":true]; } + +Entity[] splitText(string s) +{ + Entity[] result; + while (s.length) + { + auto word = firstWord(s); + if (word in wordsToSplit) + { + size_t p = word.ptr + word.length - s.ptr; + skipToEOL(s, p); + result ~= new Entity(s[0..p], null, null); + s = s[p..$]; + } + else + { + result ~= new Entity(s, null, null); + s = null; + } + } + + return result; +} + +string stripD(string s) +{ + size_t i=0; + size_t start=s.length, end=s.length; + while (i < s.length) + { + if (s[i..$].startsWithComment()) + skipSymbol(s, i); + else + if (!isWhite(s[i])) + { + if (start > i) + start = i; + skipSymbol(s, i); + end = i; + } + else + i++; + } + return s[start..end]; +} + +string firstWord(string s) +{ + size_t i = 0; + s = stripD(s); + while (i +/// Released into the Public Domain + +module dustmite; + +import std.stdio; +import std.file; +import std.path; +import std.string; +import std.getopt; +import std.array; +import std.process; +import std.algorithm; +import std.exception; +import std.datetime; +import std.regex; +import std.conv; +import std.ascii; +import std.random; + +import dsplit; + +alias std.string.join join; + +string dir, resultDir, tester, globalCache; +size_t maxBreadth; +Entity root; +int tests; bool foundAnything; +bool noSave; + +struct Times { StopWatch total, load, testSave, resultSave, test, clean, cacheHash, globalCache, misc; } +Times times; +static this() { times.total.start(); times.misc.start(); } +void measure(string what)(void delegate() p) +{ + times.misc.stop(); mixin("times."~what~".start();"); + p(); + mixin("times."~what~".stop();"); times.misc.start(); +} + +struct Reduction +{ + enum Type { None, Remove, Unwrap, ReplaceWord } + Type type; + + // Remove / Unwrap + size_t[] address; + Entity target; + + // ReplaceWord + string from, to; + size_t index, total; + + string toString() + { + string name = .to!string(type); + + final switch (type) + { + case Reduction.Type.None: + return name; + case Reduction.Type.ReplaceWord: + return format(`%s [%d/%d: %s -> %s]`, name, index+1, total, from, to); + case Reduction.Type.Remove: + case Reduction.Type.Unwrap: + string[] segments = new string[address.length]; + Entity e = root; + size_t progress; + bool binary = maxBreadth == 2; + foreach (i, a; address) + { + segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length); + foreach (c; e.children[a+1..$]) + progress += c.descendants; + progress++; // account for this node + e = e.children[a]; + } + return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", ")); + } + } +} + +auto nullReduction = Reduction(Reduction.Type.None); + +int main(string[] args) +{ + bool force, dump, showTimes, stripComments, obfuscate, keepLength, showHelp, noOptimize; + string coverageDir; + string[] noRemoveStr; + + getopt(args, + "force", &force, + "noremove", &noRemoveStr, + "strip-comments", &stripComments, + "coverage", &coverageDir, + "obfuscate", &obfuscate, + "keep-length", &keepLength, + "dump", &dump, + "times", &showTimes, + "cache", &globalCache, // for research + "nosave|no-save", &noSave, // for research + "no-optimize", &noOptimize, // for research + "h|help", &showHelp + ); + + if (showHelp || args.length == 1 || args.length>3) + { + stderr.writef(q"EOS +Usage: %s [OPTION]... PATH TESTER +PATH should be a directory containing a clean copy of the file-set to reduce. +A file path can also be specified. NAME.EXT will be treated like NAME/NAME.EXT. +TESTER should be a shell command which returns 0 for a correct reduction, +and anything else otherwise. +Supported options: + --force Force reduction of unusual files + --noremove REGEXP Do not reduce blocks containing REGEXP + (may be used multiple times) + --strip-comments Attempt to remove comments from source code. + --coverage DIR Load .lst files corresponding to source files from DIR + --obfuscate Instead of reducing, obfuscate the input by replacing + words with random substitutions + --keep-length Preserve word length when obfuscating +EOS", args[0]); + + if (!showHelp) + { + stderr.write(q"EOS + --help Show this message and some less interesting options +EOS"); + } + else + { + stderr.write(q"EOS + --help Show this message +Less interesting options: + --dump Dump parsed tree to DIR.dump file + --times Display verbose spent time breakdown + --cache DIR Use DIR as persistent disk cache + (in addition to memory cache) + --no-save Disable saving in-progress results + --no-optimize Disable tree optimization step + (may be useful with --dump) +EOS"); + } + stderr.write(q"EOS + +Full documentation can be found on the GitHub wiki: + https://github.com/CyberShadow/DustMite/wiki +EOS"); + return showHelp ? 0 : 64; // EX_USAGE + } + + enforce(!(stripComments && coverageDir), "Sorry, --strip-comments is not compatible with --coverage"); + + dir = args[1]; + if (isDirSeparator(dir[$-1])) + dir = dir[0..$-1]; + + if (args.length>=3) + tester = args[2]; + + bool isDotName(string fn) { return fn.startsWith(".") && !(fn=="." || fn==".."); } + + if (!force && isDir(dir)) + foreach (string path; dirEntries(dir, SpanMode.breadth)) + if (isDotName(baseName(path)) || isDotName(baseName(dirName(path))) || extension(path)==".o" || extension(path)==".obj" || extension(path)==".exe") + { + stderr.writefln("Suspicious file found: %s\nYou should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nre-run dustmite with the --force option.", path); + return 1; + } + + ParseOptions parseOptions; + parseOptions.stripComments = stripComments; + parseOptions.mode = obfuscate ? ParseOptions.Mode.Words : ParseOptions.Mode.Source; + measure!"load"({root = loadFiles(dir, parseOptions);}); + enforce(root.children.length, "No files in specified directory"); + + applyNoRemoveMagic(); + applyNoRemoveRegex(noRemoveStr); + if (coverageDir) + loadCoverage(coverageDir); + if (!obfuscate && !noOptimize) + optimize(root); + maxBreadth = getMaxBreadth(root); + countDescendants(root); + + if (dump) + dumpSet(dir ~ ".dump"); + + if (tester is null) + { + writeln("No tester specified, exiting"); + return 0; + } + + resultDir = dir ~ ".reduced"; + enforce(!exists(resultDir), "Result directory already exists"); + + if (!test(nullReduction)) + throw new Exception("Initial test fails"); + + foundAnything = false; + if (obfuscate) + .obfuscate(keepLength); + else + reduce(); + + auto duration = cast(Duration)times.total.peek(); + duration = dur!"msecs"(duration.total!"msecs"); // truncate anything below ms, users aren't interested in that + if (foundAnything) + { + if (noSave) + measure!"resultSave"({safeSave(resultDir);}); + writefln("Done in %s tests and %s; reduced version is in %s", tests, duration, resultDir); + } + else + writefln("Done in %s tests and %s; no reductions found", tests, duration); + + if (showTimes) + foreach (i, t; times.tupleof) + writefln("%s: %s", times.tupleof[i].stringof, cast(Duration)times.tupleof[i].peek()); + + return 0; +} + +size_t getMaxBreadth(Entity e) +{ + size_t breadth = e.children.length; + foreach (child; e.children) + { + auto childBreadth = getMaxBreadth(child); + if (breadth < childBreadth) + breadth = childBreadth; + } + return breadth; +} + +size_t countDescendants(Entity e) +{ + size_t n = 1; + foreach (c; e.children) + n += countDescendants(c); + return e.descendants = n; +} + +size_t checkDescendants(Entity e) +{ + size_t n = 1; + foreach (c; e.children) + n += countDescendants(c); + assert(e.descendants == n); + return n; +} + +/// Try reductions at address. Edit set, save result and return true on successful reduction. +bool testAddress(size_t[] address) +{ + auto e = entityAt(address); + + if (tryReduction(Reduction(Reduction.Type.Remove, address, e))) + return true; + else + if (e.head.length && e.tail.length && tryReduction(Reduction(Reduction.Type.Unwrap, address, e))) + return true; + else + return false; +} + +void testLevel(int testDepth, out bool tested, out bool changed) +{ + tested = changed = false; + + enum MAX_DEPTH = 1024; + size_t[MAX_DEPTH] address; + + void scan(Entity e, int depth) + { + if (depth < testDepth) + { + // recurse + foreach_reverse (i, c; e.children) + { + address[depth] = i; + scan(c, depth+1); + } + } + else + if (e.noRemove) + { + // skip, but don't stop going deeper + tested = true; + } + else + { + // test + tested = true; + if (testAddress(address[0..depth])) + changed = true; + } + } + + scan(root, 0); + + //writefln("Scan results: tested=%s, changed=%s", tested, changed); +} + +/// Keep going deeper until we find a successful reduction. +/// When found, finish tests at current depth and restart from top depth (new iteration). +/// If we reach the bottom (depth with no nodes on it), we're done. +void reduceCareful() +{ + bool tested; + int iterCount; + do + { + writefln("############### ITERATION %d ################", iterCount++); + bool changed; + int depth = 0; + do + { + writefln("============= Depth %d =============", depth); + + testLevel(depth, tested, changed); + + depth++; + } while (tested && !changed); // go deeper while we found something to test, but no results + } while (tested); // stop when we didn't find anything to test +} + +/// Keep going deeper until we find a successful reduction. +/// When found, go up a depth level. +/// Keep going up while we find new reductions. Repeat topmost depth level as necessary. +/// Once no new reductions are found at higher depths, jump to the next unvisited depth in this iteration. +/// If we reach the bottom (depth with no nodes on it), start a new iteration. +/// If we finish an iteration without finding any reductions, we're done. +void reduceLookback() +{ + bool iterationChanged; + int iterCount; + do + { + iterationChanged = false; + writefln("############### ITERATION %d ################", iterCount++); + + int depth = 0, maxDepth = 0; + bool depthTested; + + do + { + writefln("============= Depth %d =============", depth); + bool depthChanged; + + testLevel(depth, depthTested, depthChanged); + + if (depthChanged) + { + iterationChanged = true; + depth--; + if (depth < 0) + depth = 0; + } + else + { + maxDepth++; + depth = maxDepth; + } + } while (depthTested); // keep going up/down while we found something to test + } while (iterationChanged); // stop when we couldn't reduce anything this iteration +} + +/// Look at every entity in the tree. +/// If we can reduce this entity, continue looking at its siblings. +/// Otherwise, recurse and look at its children. +/// End an iteration once we looked at an entire tree. +/// If we finish an iteration without finding any reductions, we're done. +void reduceInDepth() +{ + bool changed; + int iterCount; + do + { + changed = false; + writefln("############### ITERATION %d ################", iterCount++); + + enum MAX_DEPTH = 1024; + size_t[MAX_DEPTH] address; + + void scan(Entity e, int depth) + { + if (e.noRemove) + { + // skip, but don't stop going deeper + } + else + { + // test + if (testAddress(address[0..depth])) + { + changed = true; + return; + } + } + + // recurse + foreach_reverse (i, c; e.children) + { + address[depth] = i; + scan(c, depth+1); + } + } + + scan(root, 0); + } while (changed && root.children.length); // stop when we couldn't reduce anything this iteration +} + +void reduce() +{ + //reduceCareful(); + //reduceLookback(); + reduceInDepth(); +} + +void obfuscate(bool keepLength) +{ + bool[string] wordSet; + string[] words; // preserve file order + + foreach (f; root.children) + { + foreach (entity; parseToWords(f.filename) ~ f.children) + if (entity.head.length && !isDigit(entity.head[0])) + if (entity.head !in wordSet) + { + wordSet[entity.head] = true; + words ~= entity.head; + } + } + + string idgen(size_t length) + { + static const first = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // use caps to avoid collisions with reserved keywords + static const other = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; + + if (keepLength) + { + auto result = new char[length]; + foreach (i, ref c; result) + c = (i==0 ? first : other)[uniform(0, $)]; + + return assumeUnique(result); + } + else + { + static int n; + int index = n++; + + string result; + result ~= first[index % $]; + index /= first.length; + + while (index) + result ~= other[index % $], + index /= other.length; + + return result; + } + } + + auto r = Reduction(Reduction.Type.ReplaceWord); + r.total = words.length; + foreach (i, word; words) + { + r.index = i; + r.from = word; + int tries = 0; + do + r.to = idgen(word.length); + while (r.to in wordSet && tries++ < 10); + wordSet[r.to] = true; + + tryReduction(r); + } +} + +bool skipEntity(Entity e) +{ + if (e.removed) + return true; + foreach (dependency; e.dependencies) + if (skipEntity(dependency)) + return true; + return false; +} + +void dump(Entity root, ref Reduction reduction, void delegate(string) handleFile, void delegate(string) handleText) +{ + void dumpEntity(Entity e) + { + if (reduction.type == Reduction.Type.ReplaceWord) + { + if (e.isFile) + { + assert(e.head.length==0 && e.tail.length==0); + handleFile(applyReductionToPath(e.filename, reduction)); + foreach (c; e.children) + dumpEntity(c); + } + else + if (e.head) + { + assert(e.children.length==0); + if (e.head == reduction.from) + handleText(reduction.to); + else + handleText(e.head); + handleText(e.tail); + } + else + foreach (c; e.children) + dumpEntity(c); + } + else + if (e is reduction.target) + { + final switch (reduction.type) + { + case Reduction.Type.None: + case Reduction.Type.ReplaceWord: + assert(0); + case Reduction.Type.Remove: // skip this entity + return; + case Reduction.Type.Unwrap: // skip head/tail + foreach (c; e.children) + dumpEntity(c); + break; + } + } + else + if (skipEntity(e)) + return; + else + if (e.isFile) + { + handleFile(e.filename); + foreach (c; e.children) + dumpEntity(c); + } + else + { + if (e.head.length) handleText(e.head); + foreach (c; e.children) + dumpEntity(c); + if (e.tail.length) handleText(e.tail); + } + } + + debug verifyNotRemoved(root); + if (reduction.type == Reduction.Type.Remove) + markRemoved(reduction.target, true); // Needed for dependencies + + dumpEntity(root); + + if (reduction.type == Reduction.Type.Remove) + markRemoved(reduction.target, false); + debug verifyNotRemoved(root); +} + +void save(Reduction reduction, string savedir) +{ + safeMkdir(savedir); + + File o; + + void handleFile(string fn) + { + auto path = buildPath(savedir, fn); + if (!exists(dirName(path))) + safeMkdir(dirName(path)); + + if (o.isOpen) + o.close(); + o.open(path, "wb"); + } + + dump(root, reduction, &handleFile, &o.write!string); + + if (o.isOpen) + o.close(); +} + +Entity entityAt(size_t[] address) +{ + Entity e = root; + foreach (a; address) + e = e.children[a]; + return e; +} + +/// Try specified reduction. If it succeeds, apply it permanently and save intermediate result. +bool tryReduction(Reduction r) +{ + if (test(r)) + { + foundAnything = true; + debug + auto hashBefore = hash(r); + applyReduction(r); + debug + { + auto hashAfter = hash(nullReduction); + assert(hashBefore == hashAfter, "Reduction preview/application mismatch"); + } + saveResult(); + return true; + } + return false; +} + +void verifyNotRemoved(Entity e) +{ + assert(!e.removed); + foreach (c; e.children) + verifyNotRemoved(c); +} + +void markRemoved(Entity e, bool value) +{ + assert(e.removed == !value); + e.removed = value; + foreach (c; e.children) + markRemoved(c, value); +} + +/// Permanently apply specified reduction to set. +void applyReduction(ref Reduction r) +{ + final switch (r.type) + { + case Reduction.Type.None: + return; + case Reduction.Type.ReplaceWord: + { + foreach (ref f; root.children) + { + f.filename = applyReductionToPath(f.filename, r); + foreach (ref entity; f.children) + if (entity.head == r.from) + entity.head = r.to; + } + return; + } + case Reduction.Type.Remove: + { + debug verifyNotRemoved(root); + + markRemoved(entityAt(r.address), true); + + if (r.address.length) + { + auto casualties = entityAt(r.address).descendants; + foreach (n; 0..r.address.length) + entityAt(r.address[0..n]).descendants -= casualties; + + auto p = entityAt(r.address[0..$-1]); + p.children = remove(p.children, r.address[$-1]); + } + else + root = new Entity(); + + debug verifyNotRemoved(root); + debug checkDescendants(root); + return; + } + case Reduction.Type.Unwrap: + with (entityAt(r.address)) + head = tail = null; + return; + } +} + +string applyReductionToPath(string path, Reduction reduction) +{ + if (reduction.type == Reduction.Type.ReplaceWord) + { + Entity[] words = parseToWords(path); + string result; + foreach (i, word; words) + { + if (i > 0 && i == words.length-1 && words[i-1].tail.endsWith(".")) + result ~= word.head; // skip extension + else + if (word.head == reduction.from) + result ~= reduction.to; + else + result ~= word.head; + result ~= word.tail; + } + return result; + } + return path; +} + +void autoRetry(void delegate() fun, string operation) +{ + while (true) + try + { + fun(); + return; + } + catch (Exception e) + { + writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg); + import core.thread; + Thread.sleep(dur!"seconds"(1)); + writeln("Retrying..."); + } +} + +/// Alternative way to check for file existence +/// Files marked for deletion act as inexistant, but still prevent creation and appear in directory listings +bool exists2(string path) +{ + return array(dirEntries(dirName(path), baseName(path), SpanMode.shallow)).length > 0; +} + +void deleteAny(string path) +{ + if (exists(path)) + { + if (isDir(path)) + rmdirRecurse(path); + else + remove(path); + } + enforce(!exists(path) && !exists2(path), "Path still exists"); // Windows only marks locked directories for deletion +} + +void safeDelete(string path) { autoRetry({deleteAny(path);}, "delete " ~ path); } +void safeRename(string src, string dst) { autoRetry({rename(src, dst);}, "rename " ~ src ~ " to " ~ dst); } +void safeMkdir(string path) { autoRetry({mkdirRecurse(path);}, "mkdir " ~ path); } + +void safeReplace(string path, void delegate(string path) creator) +{ + auto tmpPath = path ~ ".inprogress"; + if (exists(tmpPath)) safeDelete(tmpPath); + auto oldPath = path ~ ".old"; + if (exists(oldPath)) safeDelete(oldPath); + + { + scope(failure) safeDelete(tmpPath); + creator(tmpPath); + } + + if (exists(path)) safeRename(path, oldPath); + safeRename(tmpPath, path); + if (exists(oldPath)) safeDelete(oldPath); +} + + +void safeSave(string savedir) { safeReplace(savedir, path => save(nullReduction, path)); } + +void saveResult() +{ + if (!noSave) + measure!"resultSave"({safeSave(resultDir);}); +} + +version(HAVE_AE) +{ + // Use faster murmurhash from http://github.com/CyberShadow/ae + // when compiled with -version=HAVE_AE + + import ae.utils.digest; + import ae.utils.textout; + + alias MH3Digest128 HASH; + + HASH hash(Reduction reduction) + { + static StringBuffer sb; + sb.clear(); + auto writer = &sb.put!string; + dump(root, reduction, writer, writer); + return murmurHash3_128(sb.get()); + } + + alias digestToStringMH3 formatHash; +} +else +{ + import std.md5; + + alias ubyte[16] HASH; + + HASH hash(Reduction reduction) + { + ubyte[16] digest; + MD5_CTX context; + context.start(); + auto writer = cast(void delegate(string))&context.update; + dump(root, reduction, writer, writer); + context.finish(digest); + return digest; + } + + alias digestToString formatHash; +} + +bool[HASH] cache; + +bool test(Reduction reduction) +{ + write(reduction, " => "); stdout.flush(); + + HASH digest; + measure!"cacheHash"({ digest = hash(reduction); }); + + bool ramCached(lazy bool fallback) + { + auto cacheResult = digest in cache; + if (cacheResult) + { + // Note: as far as I can see, a cache hit for a positive reduction is not possible (except, perhaps, for a no-op reduction) + writeln(*cacheResult ? "Yes" : "No", " (cached)"); + return *cacheResult; + } + auto result = fallback; + return cache[digest] = result; + } + + bool diskCached(lazy bool fallback) + { + tests++; + + if (globalCache) + { + string cacheBase = absolutePath(buildPath(globalCache, formatHash(digest))) ~ "-"; + bool found; + + measure!"globalCache"({ found = exists(cacheBase~"0"); }); + if (found) + { + writeln("No (disk cache)"); + return false; + } + measure!"globalCache"({ found = exists(cacheBase~"1"); }); + if (found) + { + writeln("Yes (disk cache)"); + return true; + } + auto result = fallback; + measure!"globalCache"({ std.file.write(cacheBase ~ (result ? "1" : "0"), ""); }); + return result; + } + else + return fallback; + } + + bool doTest() + { + string testdir = dir ~ ".test"; + measure!"testSave"({save(reduction, testdir);}); scope(exit) measure!"clean"({safeDelete(testdir);}); + + auto lastdir = getcwd(); scope(exit) chdir(lastdir); + chdir(testdir); + + bool result; + measure!"test"({result = system(tester) == 0;}); + writeln(result ? "Yes" : "No"); + return result; + } + + return ramCached(diskCached(doTest())); +} + +void applyNoRemoveMagic() +{ + enum MAGIC_START = "DustMiteNoRemoveStart"; + enum MAGIC_STOP = "DustMiteNoRemoveStop"; + + bool state = false; + + bool scanString(string s) + { + if (s.length == 0) + return false; + if (s.canFind(MAGIC_START)) + state = true; + if (s.canFind(MAGIC_STOP)) + state = false; + return state; + } + + bool scan(Entity e) + { + bool removeThis; + removeThis = scanString(e.head); + foreach (c; e.children) + removeThis |= scan(c); + removeThis |= scanString(e.tail); + e.noRemove |= removeThis; + return removeThis; + } + + scan(root); +} + +void applyNoRemoveRegex(string[] noRemoveStr) +{ + auto noRemove = array(map!((string s) { return regex(s, "mg"); })(noRemoveStr)); + + void mark(Entity e) + { + e.noRemove = true; + foreach (c; e.children) + mark(c); + } + + foreach (f; root.children) + { + assert(f.isFile); + if (canFind!((a){return !match(f.filename, a).empty;})(noRemove)) + { + mark(f); + root.noRemove = true; + continue; + } + + immutable(char)*[] starts, ends; + + foreach (r; noRemove) + foreach (c; match(f.contents, r)) + { + assert(c.hit.ptr >= f.contents.ptr && c.hit.ptr < f.contents.ptr+f.contents.length); + starts ~= c.hit.ptr; + ends ~= c.hit.ptr + c.hit.length; + } + + starts.sort; + ends.sort; + + int noRemoveLevel = 0; + + bool scanString(string s) + { + if (!s.length) + return noRemoveLevel > 0; + + auto start = s.ptr; + auto end = start + s.length; + assert(start >= f.contents.ptr && end <= f.contents.ptr+f.contents.length); + + while (starts.length && starts[0] < end) + { + noRemoveLevel++; + starts = starts[1..$]; + } + bool result = noRemoveLevel > 0; + while (ends.length && ends[0] <= end) + { + noRemoveLevel--; + ends = ends[1..$]; + } + return result; + } + + bool scan(Entity e) + { + bool result = false; + if (scanString(e.head)) + result = true; + foreach (c; e.children) + if (scan(c)) + result = true; + if (scanString(e.tail)) + result = true; + if (result) + e.noRemove = root.noRemove = true; + return result; + } + + scan(f); + } +} + +void loadCoverage(string dir) +{ + void scanFile(Entity f) + { + auto fn = buildPath(dir, setExtension(baseName(f.filename), "lst")); + if (!exists(fn)) + return; + writeln("Loading coverage file ", fn); + + static bool covered(string line) + { + enforce(line.length >= 8 && line[7]=='|', "Invalid syntax in coverage file"); + line = line[0..7]; + return line != "0000000" && line != " "; + } + + auto lines = map!covered(splitLines(readText(fn))[0..$-1]); + uint line = 0; + + bool coverString(string s) + { + bool result; + foreach (char c; s) + { + result |= lines[line]; + if (c == '\n') + line++; + } + return result; + } + + bool cover(ref Entity e) + { + bool result; + result |= coverString(e.head); + foreach (ref c; e.children) + result |= cover(c); + result |= coverString(e.tail); + + e.noRemove |= result; + return result; + } + + foreach (ref c; f.children) + f.noRemove |= cover(c); + } + + void scanFiles(Entity e) + { + if (e.isFile) + scanFile(e); + else + foreach (c; e.children) + scanFiles(c); + } + + scanFiles(root); +} + +void dumpSet(string fn) +{ + auto f = File(fn, "wt"); + + string printable(string s) { return s is null ? "null" : `"` ~ s.replace("\\", `\\`).replace("\"", `\"`).replace("\r", `\r`).replace("\n", `\n`) ~ `"`; } + string printableFN(string s) { return "/*** " ~ s ~ " ***/"; } + + int counter; + void assignID(Entity e) + { + e.id = counter++; + foreach (c; e.children) + assignID(c); + } + assignID(root); + + bool[int] dependents; + void scanDependents(Entity e) + { + foreach (d; e.dependencies) + dependents[d.id] = true; + foreach (c; e.children) + scanDependents(c); + } + scanDependents(root); + + void print(Entity e, int depth) + { + auto prefix = replicate(" ", depth); + + // if (!fileLevel) { f.writeln(prefix, "[ ... ]"); continue; } + + f.write(prefix); + if (e.id in dependents) + f.write(e.id, " "); + if (e.dependencies.length) + { + f.write(" => "); + foreach (d; e.dependencies) + f.write(d.id, " "); + } + + if (e.children.length == 0) + { + f.writeln("[", e.noRemove ? "!" : "", " ", e.isFile ? e.filename ? printableFN(e.filename) ~ " " : null : e.head ? printable(e.head) ~ " " : null, e.tail ? printable(e.tail) ~ " " : null, "]"); + } + else + { + f.writeln("[", e.noRemove ? "!" : "", e.isPair ? " // Pair" : null); + if (e.isFile) f.writeln(prefix, " ", printableFN(e.filename)); + if (e.head) f.writeln(prefix, " ", printable(e.head)); + foreach (c; e.children) + print(c, depth+1); + if (e.tail) f.writeln(prefix, " ", printable(e.tail)); + f.writeln(prefix, "]"); + } + } + + print(root, 0); + + f.close(); +} + +void dumpText(string fn, ref Reduction r = nullReduction) +{ + auto f = File(fn, "wt"); + dump(root, r, (string) {}, &f.write!string); + f.close(); +}