tools/DustMite/dustmite.d
2022-07-06 19:46:23 +00:00

2700 lines
63 KiB
D

/// DustMite, a general-purpose data reduction tool
/// Written by Vladimir Panteleev <vladimir@thecybershadow.net>
/// License: Boost Software License, Version 1.0
module dustmite;
import core.atomic;
import core.thread;
import std.algorithm;
import std.array;
import std.ascii;
import std.container.rbtree;
import std.conv;
import std.datetime;
import std.datetime.stopwatch : StopWatch;
import std.exception;
import std.file;
import std.getopt;
import std.math : nextPow2;
import std.path;
import std.parallelism : totalCPUs;
import std.process;
import std.random;
import std.range;
import std.regex;
import std.stdio;
import std.string;
import std.typecons;
import splitter;
alias Splitter = splitter.Splitter;
// Issue 314 workarounds
alias std.string.join join;
alias std.string.startsWith startsWith;
string dir, resultDir, tester, globalCache;
string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }
size_t maxBreadth;
size_t origDescendants;
int tests, maxSteps = -1; bool foundAnything;
bool noSave, trace, noRedirect, doDump, whiteout;
string strategy = "inbreadth";
struct Times { StopWatch total, load, testSave, resultSave, apply, lookaheadApply, lookaheadWaitThread, lookaheadWaitProcess, test, clean, globalCache, misc; }
Times times;
static this() { times.total.start(); times.misc.start(); }
void measure(string what)(scope void delegate() p)
{
times.misc.stop(); mixin("times."~what~".start();");
p();
mixin("times."~what~".stop();"); times.misc.start();
}
struct Reduction
{
enum Type { None, Remove, Unwrap, Concat, ReplaceWord, Swap }
Type type;
Entity root;
// Remove / Unwrap / Concat / Swap
const(Address)* address;
// Swap
const(Address)* address2;
// ReplaceWord
string from, to;
size_t index, total;
string toString()
{
string name = .to!string(type);
final switch (type)
{
case Reduction.Type.None:
return name;
case Reduction.Type.ReplaceWord:
return format(`%s [%d/%d: %s -> %s]`, name, index+1, total, from, to);
case Reduction.Type.Remove:
case Reduction.Type.Unwrap:
case Reduction.Type.Concat:
{
auto address = addressToArr(this.address);
string[] segments = new string[address.length];
Entity e = root;
size_t progress;
bool binary = maxBreadth == 2;
foreach (i, a; address)
{
auto p = a;
foreach (c; e.children[0..a])
if (c.dead)
p--;
segments[i] = binary ? text(p) : format("%d/%d", e.children.length-a, e.children.length);
foreach (c; e.children[0..a])
progress += c.descendants;
progress++; // account for this node
e = e.children[a];
}
progress += e.descendants;
auto progressPM = (origDescendants-progress) * 1000 / origDescendants; // per-mille
return format("[%2d.%d%%] %s [%s]", progressPM/10, progressPM%10, name, segments.join(binary ? "" : ", "));
}
case Reduction.Type.Swap:
{
static string addressToString(Entity root, const(Address)* addressPtr)
{
auto address = addressToArr(findEntityEx(root, addressPtr).address);
string[] segments = new string[address.length];
bool binary = maxBreadth == 2;
Entity e = root;
foreach (i, a; address)
{
auto p = a;
foreach (c; e.children[0..a])
if (c.dead)
p--;
segments[i] = binary ? text(p) : format("%d/%d", e.children.length-a, e.children.length);
e = e.children[a];
}
return segments.join(binary ? "" : ", ");
}
return format("[%s] <-> [%s]",
addressToString(root, address),
addressToString(root, address2));
}
}
}
}
Address rootAddress;
auto nullReduction = Reduction(Reduction.Type.None);
struct RemoveRule { Regex!char regexp; string shellGlob; bool remove; }
int main(string[] args)
{
bool force, dumpHtml, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, showVersion, noOptimize, inPlace;
string coverageDir;
RemoveRule[] removeRules;
string[] splitRules;
uint lookaheadCount, tabWidth = 8;
args = args.filter!(
(arg)
{
if (arg.startsWith("-j"))
{
arg = arg[2..$];
lookaheadCount = arg.length ? arg.to!uint : totalCPUs;
return false;
}
return true;
}).array();
getopt(args,
"force", &force,
"reduceonly|reduce-only", (string opt, string value) { removeRules ~= RemoveRule(Regex!char.init, value, true); },
"remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, true); },
"noremove|no-remove" , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, false); },
"strip-comments", &stripComments,
"whiteout|white-out", &whiteout,
"coverage", &coverageDir,
"obfuscate", &obfuscate,
"fuzz", &fuzz,
"keep-length", &keepLength,
"strategy", &strategy,
"split", &splitRules,
"dump", &doDump,
"dump-html", &dumpHtml,
"times", &showTimes,
"noredirect|no-redirect", &noRedirect,
"cache", &globalCache, // for research
"trace", &trace, // for debugging
"nosave|no-save", &noSave, // for research
"nooptimize|no-optimize", &noOptimize, // for research
"tab-width", &tabWidth,
"max-steps", &maxSteps, // for research / benchmarking
"i|in-place", &inPlace,
"h|help", &showHelp,
"V|version", &showVersion,
);
if (showVersion)
{
version (Dlang_Tools)
enum source = "dlang/tools";
else
version (Dustmite_CustomSource) // Packaging Dustmite separately for a distribution?
enum source = import("source");
else
enum source = "upstream";
writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__);
if (args.length == 1)
return 0;
}
if (showHelp || args.length == 1 || args.length>3)
{
stderr.writef(q"EOS
Usage: %s [OPTION]... PATH TESTER
PATH should be a directory containing a clean copy of the file-set to reduce.
A file path can also be specified. NAME.EXT will be treated like NAME/NAME.EXT.
TESTER should be a shell command which returns 0 for a correct reduction,
and anything else otherwise.
Supported options:
--force Force reduction of unusual files
--reduce-only MASK Only reduce paths glob-matching MASK
(may be used multiple times)
--remove REGEXP Only reduce blocks covered by REGEXP
(may be used multiple times)
--no-remove REGEXP Do not reduce blocks containing REGEXP
(may be used multiple times)
--strip-comments Attempt to remove comments from source code
--white-out Replace deleted text with spaces to preserve line numbers
--coverage DIR Load .lst files corresponding to source files from DIR
--fuzz Instead of reducing, fuzz the input by performing random
changes until TESTER returns 0
--obfuscate Instead of reducing, obfuscate the input by replacing
words with random substitutions
--keep-length Preserve word length when obfuscating
--split MASK:MODE Parse and reduce files specified by MASK using the given
splitter. Can be repeated. MODE must be one of:
%-(%s, %)
--no-redirect Don't redirect stdout/stderr streams of test command
-j[N] Use N look-ahead processes (%d by default)
EOS", args[0], splitterNames, totalCPUs);
if (!showHelp)
{
stderr.write(q"EOS
-h, --help Show this message and some less interesting options
EOS");
}
else
{
stderr.write(q"EOS
-h, --help Show this message
Less interesting options:
-V, --version Show program version
--strategy STRAT Set strategy (careful/lookback/pingpong/indepth/inbreadth)
--dump Dump parsed tree to DIR.dump file
--dump-html Dump parsed tree to DIR.html file
--times Display verbose spent time breakdown
--cache DIR Use DIR as persistent disk cache
(in addition to memory cache)
--trace Save all attempted reductions to DIR.trace
-i, --in-place Overwrite input with results
--no-save Disable saving in-progress results
--no-optimize Disable tree optimization step
(may be useful with --dump)
--max-steps N Perform no more than N steps when reducing
--tab-width N How many spaces one tab is equivalent to
(for the "indent" split mode)
EOS");
}
stderr.write(q"EOS
Full documentation can be found on the GitHub wiki:
https://github.com/CyberShadow/DustMite/wiki
EOS");
return showHelp ? 0 : 64; // EX_USAGE
}
enforce(!(stripComments && coverageDir), "Sorry, --strip-comments is not compatible with --coverage");
dir = args[1];
if (isDirSeparator(dir[$-1]))
dir = dir[0..$-1];
if (args.length>=3)
tester = args[2];
bool isDotName(string fn) { return fn.startsWith(".") && !(fn=="." || fn==".."); }
bool suspiciousFilesFound;
if (!force && isDir(dir))
foreach (string path; dirEntries(dir, SpanMode.breadth))
if (isDotName(baseName(path)) || isDotName(baseName(dirName(path))) || extension(path)==".o" || extension(path)==".obj" || extension(path)==".exe")
{
stderr.writeln("Warning: Suspicious file found: ", path);
suspiciousFilesFound = true;
}
if (suspiciousFilesFound)
stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message.");
ParseRule parseSplitRule(string rule)
{
auto p = rule.lastIndexOf(':');
enforce(p > 0, "Invalid parse rule: " ~ rule);
auto pattern = rule[0..p];
auto splitterName = rule[p+1..$];
auto splitterIndex = splitterNames.countUntil(splitterName);
enforce(splitterIndex >= 0, "Unknown splitter: " ~ splitterName);
return ParseRule(pattern, cast(Splitter)splitterIndex);
}
Entity root;
ParseOptions parseOptions;
parseOptions.stripComments = stripComments;
parseOptions.mode = obfuscate ? ParseOptions.Mode.words : ParseOptions.Mode.source;
parseOptions.rules = splitRules.map!parseSplitRule().array();
parseOptions.tabWidth = tabWidth;
measure!"load"({root = loadFiles(dir, parseOptions);});
enforce(root.children.length, "No files in specified directory");
applyNoRemoveMagic(root);
applyNoRemoveRules(root, removeRules);
applyNoRemoveDeps(root);
if (coverageDir)
loadCoverage(root, coverageDir);
if (!obfuscate && !noOptimize)
optimize(root);
maxBreadth = getMaxBreadth(root);
assignID(root);
convertRefs(root);
recalculate(root);
resetProgress(root);
if (doDump)
dumpSet(root, dirSuffix("dump"));
if (dumpHtml)
dumpToHtml(root, dirSuffix("html"));
if (tester is null)
{
writeln("No tester specified, exiting");
return 0;
}
if (inPlace)
resultDir = dir;
else
{
resultDir = dirSuffix("reduced");
if (resultDir.exists)
{
writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists");
throw new Exception("Result directory already exists");
}
}
if (!fuzz)
{
auto nullResult = test(root, [nullReduction]);
if (!nullResult.success)
{
auto testerFile = dir.buildNormalizedPath(tester);
version (Posix)
{
if (testerFile.exists && (testerFile.getAttributes() & octal!111) == 0)
writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName());
}
if (!testerFile.exists && tester.exists)
writeln("Hint: test program path should be relative to the source directory, try " ~
tester.absolutePath.relativePath(dir.absolutePath).escapeShellFileName() ~
" instead of " ~ tester.escapeShellFileName());
if (!noRedirect)
writeln("Hint: use --no-redirect to see test script output");
writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails");
throw new Exception("Initial test fails: " ~ nullResult.reason);
}
}
lookaheadProcesses = new Lookahead[lookaheadCount];
foundAnything = false;
string resultAdjective;
if (obfuscate)
{
resultAdjective = "obfuscated";
.obfuscate(root, keepLength);
}
else
if (fuzz)
{
resultAdjective = "fuzzed";
.fuzz(root);
}
else
{
resultAdjective = "reduced";
reduce(root);
}
auto duration = times.total.peek();
duration = dur!"msecs"(duration.total!"msecs"); // truncate anything below ms, users aren't interested in that
if (foundAnything)
{
if (!root.dead)
{
if (noSave)
measure!"resultSave"({safeSave(root, resultDir);});
writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir);
}
else
{
writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set");
writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective);
}
}
else
writefln("Done in %s tests and %s; no reductions found", tests, duration);
if (showTimes)
foreach (i, t; times.tupleof)
writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek());
return 0;
}
size_t getMaxBreadth(Entity e)
{
size_t breadth = e.children.length;
foreach (child; e.children)
{
auto childBreadth = getMaxBreadth(child);
if (breadth < childBreadth)
breadth = childBreadth;
}
return breadth;
}
/// An output range which only allocates a new copy on the first write
/// that's different from a given original copy.
auto cowRange(E)(E[] arr)
{
static struct Range
{
void put(ref E item)
{
if (pos != size_t.max)
{
if (pos == arr.length || item != arr[pos])
{
arr = arr[0 .. pos];
pos = size_t.max;
// continue to append (appending to a slice will copy)
}
else
{
pos++;
return;
}
}
arr ~= item;
}
E[] get() { return pos == size_t.max ? arr : arr[0 .. pos]; }
private:
E[] arr;
size_t pos; // if size_t.max, then the copy has occurred
}
return Range(arr);
}
/// Update computed fields for dirty nodes
void recalculate(Entity root)
{
// Pass 1 - length + hash (and some other calculated fields)
{
bool pass1(Entity e, Address *addr)
{
if (e.clean)
return false;
auto allDependents = e.allDependents.cowRange();
e.descendants = 1;
e.hash = e.deadHash = EntityHash.init;
e.contents = e.deadContents = null;
void putString(string s)
{
e.hash.put(s);
// There is a circular dependency here.
// Calculating the whited-out string efficiently
// requires the total length of all children,
// which is conveniently included in the hash.
// However, calculating the hash requires knowing
// the whited-out string that will be written.
// Break the cycle by calculating the hash of the
// redundantly whited-out string explicitly here.
if (whiteout)
foreach (c; s)
e.deadHash.put(c.isWhite ? c : ' ');
}
putString(e.filename);
putString(e.head);
void addDependents(R)(R range, bool fresh)
{
if (e.dead)
return;
auto oldDependents = allDependents.get();
dependentLoop:
foreach (const(Address)* d; range)
{
if (!fresh)
{
d = findEntity(root, d).address;
if (!d)
continue; // Gone
}
if (d.startsWith(addr))
continue; // Internal
// Deduplicate
foreach (o; oldDependents)
if (equal(d, o))
continue dependentLoop;
allDependents.put(d);
}
}
if (e.dead)
assert(!e.dependents);
else
{
// Update dependents' addresses
auto dependents = cowRange(e.dependents);
foreach (d; e.dependents)
{
d.address = findEntity(root, d.address).address;
if (d.address)
dependents.put(d);
}
e.dependents = dependents.get();
}
addDependents(e.dependents.map!(d => d.address), true);
foreach (i, c; e.children)
{
bool fresh = pass1(c, addr.child(i));
e.descendants += c.descendants;
e.hash.put(c.hash);
e.deadHash.put(c.deadHash);
addDependents(c.allDependents, fresh);
}
putString(e.tail);
e.allDependents = allDependents.get();
assert(e.deadHash.length == (whiteout ? e.hash.length : 0));
if (e.dead)
{
e.descendants = 0;
// Switch to the "dead" variant of this subtree's hash at this point.
// This is irreversible (in child nodes).
e.hash = e.deadHash;
}
return true;
}
pass1(root, &rootAddress);
}
// --white-out pass - calculate deadContents
if (whiteout)
{
// At the top-most dirty node, start a contiguous buffer
// which contains the concatenation of all child nodes.
char[] buf;
size_t pos;
void putString(string s)
{
foreach (char c; s)
{
if (!isWhite(c))
c = ' ';
buf[pos++] = c;
}
}
void putWhite(string s)
{
foreach (c; s)
buf[pos++] = c;
}
// This needs to run in a second pass because we
// need to know the total length of nodes first.
void passWO(Entity e, bool inFile)
{
if (e.clean)
{
if (buf)
putWhite(e.deadContents);
return;
}
inFile |= e.isFile;
assert(e.hash.length == e.deadHash.length);
bool bufStarted;
// We start a buffer even when outside of a file,
// for efficiency (use one buffer across many files).
if (!buf && e.deadHash.length)
{
buf = new char[e.deadHash.length];
pos = 0;
bufStarted = true;
}
auto start = pos;
putString(e.filename);
putString(e.head);
foreach (c; e.children)
passWO(c, inFile);
putString(e.tail);
assert(e.deadHash.length == e.hash.length);
e.deadContents = cast(string)buf[start .. pos];
if (bufStarted)
{
assert(pos == buf.length);
buf = null;
pos = 0;
}
if (inFile)
e.contents = e.deadContents;
}
passWO(root, false);
}
{
void passFinal(Entity e)
{
if (e.clean)
return;
foreach (c; e.children)
passFinal(c);
e.clean = true;
}
passFinal(root);
}
}
size_t checkDescendants(Entity e)
{
if (e.dead)
return 0;
size_t n = e.dead ? 0 : 1;
foreach (c; e.children)
n += checkDescendants(c);
assert(e.descendants == n, "Wrong descendant count: expected %d, found %d".format(e.descendants, n));
return n;
}
bool addressDead(Entity root, size_t[] address) // TODO: this function shouldn't exist
{
if (root.dead)
return true;
if (!address.length)
return false;
return addressDead(root.children[address[0]], address[1..$]);
}
struct ReductionIterator
{
Strategy strategy;
bool done = false;
Reduction.Type type = Reduction.Type.None;
Entity e;
this(Strategy strategy)
{
this.strategy = strategy;
next(false);
}
this(this)
{
strategy = strategy.dup;
}
@property ref Entity root() { return strategy.root; }
@property Reduction front() { return Reduction(type, root, strategy.front.addressFromArr); }
void nextEntity(bool success) /// Iterate strategy until the next non-dead node
{
strategy.next(success);
while (!strategy.done && root.addressDead(strategy.front))
strategy.next(false);
}
void reset()
{
strategy.reset();
type = Reduction.Type.None;
}
void next(bool success)
{
if (success && type == Reduction.Type.Concat)
reset(); // Significant changes across the tree
while (true)
{
final switch (type)
{
case Reduction.Type.None:
if (strategy.done)
{
done = true;
return;
}
e = root.entityAt(strategy.front);
if (e.noRemove)
{
nextEntity(false);
continue;
}
if (e is root && !root.children.length)
{
nextEntity(false);
continue;
}
// Try next reduction type
type = Reduction.Type.Remove;
return;
case Reduction.Type.Remove:
if (success)
{
// Next node
type = Reduction.Type.None;
nextEntity(true);
continue;
}
// Try next reduction type
type = Reduction.Type.Unwrap;
if (e.head.length && e.tail.length)
return; // Try this
else
{
success = false; // Skip
continue;
}
case Reduction.Type.Unwrap:
if (success)
{
// Next node
type = Reduction.Type.None;
nextEntity(true);
continue;
}
// Try next reduction type
type = Reduction.Type.Concat;
if (e.isFile)
return; // Try this
else
{
success = false; // Skip
continue;
}
case Reduction.Type.Concat:
// Next node
type = Reduction.Type.None;
nextEntity(success);
continue;
case Reduction.Type.ReplaceWord:
case Reduction.Type.Swap:
assert(false);
}
}
}
}
void resetProgress(Entity root)
{
origDescendants = root.descendants;
}
abstract class Strategy
{
Entity root;
uint progressGeneration = 0;
bool done = false;
void copy(Strategy result) const
{
result.root = cast()root;
result.progressGeneration = this.progressGeneration;
result.done = this.done;
}
abstract @property size_t[] front();
abstract void next(bool success);
abstract void reset(); /// Invoked by ReductionIterator for significant tree changes
int getIteration() { return -1; }
int getDepth() { return -1; }
final Strategy dup()
{
auto result = cast(Strategy)this.classinfo.create();
copy(result);
return result;
}
}
class SimpleStrategy : Strategy
{
size_t[] address;
override void copy(Strategy target) const
{
super.copy(target);
auto result = cast(SimpleStrategy)target;
result.address = this.address.dup;
}
override @property size_t[] front()
{
assert(!done, "Done");
return address;
}
override void next(bool success)
{
assert(!done, "Done");
}
}
class IterativeStrategy : SimpleStrategy
{
int iteration = 0;
bool iterationChanged;
override int getIteration() { return iteration; }
override void copy(Strategy target) const
{
super.copy(target);
auto result = cast(IterativeStrategy)target;
result.iteration = this.iteration;
result.iterationChanged = this.iterationChanged;
}
override void next(bool success)
{
super.next(success);
iterationChanged |= success;
}
void nextIteration()
{
assert(iterationChanged, "Starting new iteration after no changes");
reset();
}
override void reset()
{
iteration++;
iterationChanged = false;
address = null;
progressGeneration++;
}
}
/// Find the first address at the depth of address.length,
/// and populate address[] accordingly.
/// Return false if no address at that level could be found.
bool findAddressAtLevel(size_t[] address, Entity root)
{
if (root.dead)
return false;
if (!address.length)
return true;
foreach_reverse (i, child; root.children)
{
if (findAddressAtLevel(address[1..$], child))
{
address[0] = i;
return true;
}
}
return false;
}
/// Find the next address at the depth of address.length,
/// and update address[] accordingly.
/// Return false if no more addresses at that level could be found.
bool nextAddressInLevel(size_t[] address, Entity root)
{
if (!address.length || root.dead)
return false;
if (nextAddressInLevel(address[1..$], root.children[address[0]]))
return true;
if (!address[0])
return false;
foreach_reverse (i; 0..address[0])
{
if (findAddressAtLevel(address[1..$], root.children[i]))
{
address[0] = i;
return true;
}
}
return false;
}
/// Find the next address, starting from the given one
/// (going depth-first). Update address accordingly.
/// If descend is false, then skip addresses under the given one.
/// Return false if no more addresses could be found.
bool nextAddress(ref size_t[] address, Entity root, bool descend)
{
if (root.dead)
return false;
if (!address.length)
{
if (descend && root.children.length)
{
address ~= [root.children.length-1];
return true;
}
return false;
}
auto cdr = address[1..$];
if (nextAddress(cdr, root.children[address[0]], descend))
{
address = address[0] ~ cdr;
return true;
}
if (address[0])
{
address = [address[0] - 1];
return true;
}
return false;
}
class LevelStrategy : IterativeStrategy
{
bool levelChanged;
bool invalid;
override int getDepth() { return cast(int)address.length; }
override void copy(Strategy target) const
{
super.copy(target);
auto result = cast(LevelStrategy)target;
result.levelChanged = this.levelChanged;
result.invalid = this.invalid;
}
override void next(bool success)
{
super.next(success);
levelChanged |= success;
}
override void nextIteration()
{
super.nextIteration();
invalid = false;
levelChanged = false;
}
final bool nextInLevel()
{
assert(!invalid, "Choose a level!");
if (nextAddressInLevel(address, root))
return true;
else
{
invalid = true;
return false;
}
}
final @property size_t currentLevel() const { return address.length; }
final bool setLevel(size_t level)
{
address.length = level;
if (findAddressAtLevel(address, root))
{
invalid = false;
levelChanged = false;
progressGeneration++;
return true;
}
else
return false;
}
}
/// Keep going deeper until we find a successful reduction.
/// When found, finish tests at current depth and restart from top depth (new iteration).
/// If we reach the bottom (depth with no nodes on it), we're done.
final class CarefulStrategy : LevelStrategy
{
override void next(bool success)
{
super.next(success);
if (!nextInLevel())
{
// End of level
if (levelChanged)
{
nextIteration();
}
else
if (!setLevel(currentLevel + 1))
{
if (iterationChanged)
nextIteration();
else
done = true;
}
}
}
}
/// Keep going deeper until we find a successful reduction.
/// When found, go up a depth level.
/// Keep going up while we find new reductions. Repeat topmost depth level as necessary.
/// Once no new reductions are found at higher depths, jump to the next unvisited depth in this iteration.
/// If we reach the bottom (depth with no nodes on it), start a new iteration.
/// If we finish an iteration without finding any reductions, we're done.
final class LookbackStrategy : LevelStrategy
{
size_t maxLevel = 0;
override void copy(Strategy target) const
{
super.copy(target);
auto result = cast(LookbackStrategy)target;
result.maxLevel = this.maxLevel;
}
override void nextIteration()
{
super.nextIteration();
maxLevel = 0;
}
override void next(bool success)
{
super.next(success);
if (!nextInLevel())
{
// End of level
if (levelChanged)
{
setLevel(currentLevel ? currentLevel - 1 : 0);
}
else
if (setLevel(maxLevel + 1))
{
maxLevel = currentLevel;
}
else
{
if (iterationChanged)
nextIteration();
else
done = true;
}
}
}
}
/// Keep going deeper until we find a successful reduction.
/// When found, go up a depth level.
/// Keep going up while we find new reductions. Repeat topmost depth level as necessary.
/// Once no new reductions are found at higher depths, start going downwards again.
/// If we reach the bottom (depth with no nodes on it), start a new iteration.
/// If we finish an iteration without finding any reductions, we're done.
final class PingPongStrategy : LevelStrategy
{
override void next(bool success)
{
super.next(success);
if (!nextInLevel())
{
// End of level
if (levelChanged)
{
setLevel(currentLevel ? currentLevel - 1 : 0);
}
else
if (!setLevel(currentLevel + 1))
{
if (iterationChanged)
nextIteration();
else
done = true;
}
}
}
}
/// Keep going deeper.
/// If we reach the bottom (depth with no nodes on it), start a new iteration.
/// If we finish an iteration without finding any reductions, we're done.
final class InBreadthStrategy : LevelStrategy
{
override void next(bool success)
{
super.next(success);
if (!nextInLevel())
{
// End of level
if (!setLevel(currentLevel + 1))
{
if (iterationChanged)
nextIteration();
else
done = true;
}
}
}
}
/// Look at every entity in the tree.
/// If we can reduce this entity, continue looking at its siblings.
/// Otherwise, recurse and look at its children.
/// End an iteration once we looked at an entire tree.
/// If we finish an iteration without finding any reductions, we're done.
final class InDepthStrategy : IterativeStrategy
{
final bool nextAddress(bool descend)
{
return .nextAddress(address, root, descend);
}
override void next(bool success)
{
super.next(success);
if (!nextAddress(!success))
{
if (iterationChanged)
nextIteration();
else
done = true;
}
}
}
ReductionIterator iter;
void reduceByStrategy(Strategy strategy)
{
int lastIteration = -1;
int lastDepth = -1;
int lastProgressGeneration = -1;
int steps = 0;
iter = ReductionIterator(strategy);
while (!iter.done)
{
if (maxSteps >= 0 && steps++ == maxSteps)
return;
if (lastIteration != strategy.getIteration())
{
writefln("############### ITERATION %d ################", strategy.getIteration());
lastIteration = strategy.getIteration();
}
if (lastDepth != strategy.getDepth())
{
writefln("============= Depth %d =============", strategy.getDepth());
lastDepth = strategy.getDepth();
}
if (lastProgressGeneration != strategy.progressGeneration)
{
resetProgress(iter.root);
lastProgressGeneration = strategy.progressGeneration;
}
auto result = tryReduction(iter.root, iter.front);
iter.next(result);
predictor.put(result);
}
}
Strategy createStrategy(string name)
{
switch (name)
{
case "careful":
return new CarefulStrategy();
case "lookback":
return new LookbackStrategy();
case "pingpong":
return new PingPongStrategy();
case "indepth":
return new InDepthStrategy();
case "inbreadth":
return new InBreadthStrategy();
default:
throw new Exception("Unknown strategy");
}
}
void reduce(ref Entity root)
{
auto strategy = createStrategy(.strategy);
strategy.root = root;
reduceByStrategy(strategy);
root = strategy.root;
}
Mt19937 rng;
void obfuscate(ref Entity root, bool keepLength)
{
bool[string] wordSet;
string[] words; // preserve file order
foreach (f; root.children)
{
foreach (entity; parseToWords(f.filename) ~ f.children)
if (entity.head.length && !isDigit(entity.head[0]))
if (entity.head !in wordSet)
{
wordSet[entity.head] = true;
words ~= entity.head;
}
}
string idgen(size_t length)
{
static const first = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // use caps to avoid collisions with reserved keywords
static const other = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
if (keepLength)
{
auto result = new char[length];
foreach (i, ref c; result)
c = (i==0 ? first : other)[uniform(0, cast(uint)$, rng)];
return assumeUnique(result);
}
else
{
static int n;
int index = n++;
string result;
result ~= first[index % $];
index /= first.length;
while (index)
result ~= other[index % $],
index /= other.length;
return result;
}
}
auto r = Reduction(Reduction.Type.ReplaceWord);
r.total = words.length;
foreach (i, word; words)
{
r.index = i;
r.from = word;
int tries = 0;
do
r.to = idgen(word.length);
while (r.to in wordSet && tries++ < 10);
wordSet[r.to] = true;
tryReduction(root, r);
}
}
void fuzz(ref Entity root)
{
debug {} else rng.seed(unpredictableSeed);
Address*[] allAddresses;
void collectAddresses(Entity e, Address* addr)
{
allAddresses ~= addr;
foreach (i, c; e.children)
collectAddresses(c, addr.child(i));
}
collectAddresses(root, &rootAddress);
while (true)
{
import std.math : log2;
auto newRoot = root;
auto numReductions = uniform(1, cast(int)log2(cast(double)allAddresses.length), rng);
Reduction[] reductions;
foreach (n; 0 .. numReductions)
{
static immutable Reduction.Type[] reductionTypes = [Reduction.Type.Swap];
auto r = Reduction(reductionTypes[uniform(0, $, rng)], newRoot);
switch (r.type)
{
case Reduction.Type.Swap:
r.address = findEntity(newRoot, allAddresses[uniform(0, $, rng)]).address;
r.address2 = findEntity(newRoot, allAddresses[uniform(0, $, rng)]).address;
if (r.address.startsWith(r.address2) ||
r.address2.startsWith(r.address))
continue;
break;
default:
assert(false);
}
newRoot = applyReduction(newRoot, r);
reductions ~= r;
}
if (newRoot is root)
continue;
auto result = test(newRoot, reductions);
if (result.success)
{
foundAnything = true;
root = newRoot;
saveResult(root);
return;
}
}
}
void dump(Writer)(Entity root, Writer writer)
{
void dumpEntity(bool inFile)(Entity e)
{
if (e.dead)
{
if (inFile && e.contents.length)
writer.handleText(e.contents[e.filename.length .. $]);
}
else
if (!inFile && e.isFile)
{
writer.handleFile(e.filename);
foreach (c; e.children)
dumpEntity!true(c);
}
else
{
if (inFile && e.head.length) writer.handleText(e.head);
foreach (c; e.children)
dumpEntity!inFile(c);
if (inFile && e.tail.length) writer.handleText(e.tail);
}
}
dumpEntity!false(root);
}
static struct FastWriter(Next) /// Accelerates Writer interface by bulking contiguous strings
{
Next next;
immutable(char)* start, end;
void finish()
{
if (start != end)
next.handleText(start[0 .. end - start]);
start = end = null;
}
void handleFile(string s)
{
finish();
next.handleFile(s);
}
void handleText(string s)
{
if (s.ptr != end)
{
finish();
start = s.ptr;
}
end = s.ptr + s.length;
}
~this() { finish(); }
}
void save(Entity root, string savedir)
{
safeDelete(savedir);
safeMkdir(savedir);
static struct DiskWriter
{
string dir;
File o;
typeof(o.lockingBinaryWriter()) binaryWriter;
void handleFile(string fn)
{
static Appender!(char[]) pathBuf;
pathBuf.clear();
pathBuf.put(dir.chainPath(fn));
auto path = pathBuf.data;
if (!exists(dirName(path)))
safeMkdir(dirName(path));
o.open(cast(string)path, "wb");
binaryWriter = o.lockingBinaryWriter;
}
void handleText(string s)
{
binaryWriter.put(s);
}
}
FastWriter!DiskWriter writer;
writer.next.dir = savedir;
dump(root, &writer);
writer.finish();
}
Entity entityAt(Entity root, size_t[] address) // TODO: replace uses with findEntity and remove
{
Entity e = root;
foreach (a; address)
e = e.children[a];
return e;
}
Address* addressFromArr(size_t[] address) // TODO: replace uses with findEntity and remove
{
Address* a = &rootAddress;
foreach (index; address)
a = a.child(index);
return a;
}
size_t[] addressToArr(const(Address)* address)
{
auto result = new size_t[address.depth];
while (address.parent)
{
result[address.depth - 1] = address.index;
address = address.parent;
}
return result;
}
/// Return true if these two addresses are the same
/// (they point to the same node).
bool equal(const(Address)* a, const(Address)* b)
{
if (a is b)
return true;
if (a.depth != b.depth)
return false;
if (a.index != b.index)
return false;
assert(a.parent && b.parent); // If we are at the root node, then the address check should have passed
return equal(a.parent, b.parent);
}
alias equal = std.algorithm.comparison.equal;
/// Returns true if the `haystack` address starts with the `needle` address,
/// i.e. the entity that needle points at is a child of the entity that haystack points at.
bool startsWith(const(Address)* haystack, const(Address)* needle)
{
if (haystack.depth < needle.depth)
return false;
while (haystack.depth > needle.depth)
haystack = haystack.parent;
return equal(haystack, needle);
}
/// Try specified reduction. If it succeeds, apply it permanently and save intermediate result.
bool tryReduction(ref Entity root, Reduction r)
{
Entity newRoot;
measure!"apply"({ newRoot = root.applyReduction(r); });
if (newRoot is root)
{
assert(r.type != Reduction.Type.None);
writeln(r, " => N/A");
return false;
}
if (test(newRoot, [r]).success)
{
foundAnything = true;
root = newRoot;
saveResult(root);
return true;
}
return false;
}
/// Apply a reduction to this tree, and return the resulting tree.
/// The original tree remains unchanged.
/// Copies only modified parts of the tree, and whatever references them.
Entity applyReductionImpl(Entity origRoot, ref Reduction r)
{
Entity root = origRoot;
debug static ubyte[] treeBytes(Entity e) { return (cast(ubyte*)e)[0 .. __traits(classInstanceSize, Entity)] ~ cast(ubyte[])e.children ~ e.children.map!treeBytes.join; }
debug auto origBytes = treeBytes(origRoot);
scope(exit) debug assert(origBytes == treeBytes(origRoot), "Original tree was changed!");
scope(success) debug if (root !is origRoot) assert(treeBytes(root) != origBytes, "Tree was unchanged");
debug void checkClean(Entity e) { assert(e.clean, "Found dirty node before/after reduction"); foreach (c; e.children) checkClean(c); }
debug checkClean(root);
scope(success) debug checkClean(root);
static struct EditResult
{
Entity entity; /// Entity at address. Never null.
bool dead; /// Entity or one of its parents is dead.
}
EditResult editImpl(const(Address)* addr)
{
Entity* pEntity;
bool dead;
if (addr.parent)
{
auto result = editImpl(addr.parent);
auto parent = result.entity;
pEntity = &parent.children[addr.index];
dead = result.dead;
}
else
{
pEntity = &root;
dead = false;
}
auto oldEntity = *pEntity;
if (oldEntity.redirect)
{
assert(oldEntity.dead);
return editImpl(oldEntity.redirect);
}
dead |= oldEntity.dead;
// We can avoid copying the entity if it (or a parent) is dead, because edit()
// will not allow such an entity to be returned back to applyReduction.
if (!oldEntity.clean || dead)
return EditResult(oldEntity, dead);
auto newEntity = oldEntity.dup();
newEntity.clean = false;
*pEntity = newEntity;
return EditResult(newEntity, dead);
}
Entity edit(const(Address)* addr) /// Returns a writable copy of the entity at the given Address
{
auto r = editImpl(addr);
return r.dead ? null : r.entity;
}
final switch (r.type)
{
case Reduction.Type.None:
break;
case Reduction.Type.ReplaceWord:
foreach (i; 0 .. root.children.length)
{
auto fa = rootAddress.children[i];
auto f = edit(fa);
f.filename = applyReductionToPath(f.filename, r);
foreach (j, const word; f.children)
if (word.head == r.from)
edit(fa.children[j]).head = r.to;
}
break;
case Reduction.Type.Remove:
{
assert(!findEntity(root, r.address).entity.dead, "Trying to remove a tombstone");
void remove(const(Address)* address)
{
auto n = edit(address);
if (!n)
return; // This dependency was removed by something else
n.dead = true; // Mark as dead early, so that we don't waste time processing dependencies under this node
foreach (dep; n.allDependents)
remove(dep);
n.kill(); // Convert to tombstone
}
remove(r.address);
break;
}
case Reduction.Type.Unwrap:
{
assert(!findEntity(root, r.address).entity.dead, "Trying to unwrap a tombstone");
bool changed;
with (edit(r.address))
foreach (value; [&head, &tail])
{
string newValue = whiteout
? cast(string)((*value).representation.map!(c => isWhite(c) ? char(c) : ' ').array)
: null;
changed |= *value != newValue;
*value = newValue;
}
if (!changed)
root = origRoot;
break;
}
case Reduction.Type.Concat:
{
// Move all nodes from all files to a single file (the target).
// Leave behind redirects.
size_t numFiles;
Entity[] allData;
Entity[Entity] tombstones; // Map from moved entity to its tombstone
// Collect the nodes to move, and leave behind tombstones.
void collect(Entity e, Address* addr)
{
if (e.dead)
return;
if (e.isFile)
{
// Skip noRemove files, except when they are the target
// (in which case they will keep their contents after the reduction).
if (e.noRemove && !equal(addr, r.address))
return;
if (!e.children.canFind!(c => !c.dead))
return; // File is empty (already concat'd?)
numFiles++;
allData ~= e.children;
auto f = edit(addr);
f.children = new Entity[e.children.length];
foreach (i; 0 .. e.children.length)
{
auto tombstone = new Entity;
tombstone.kill();
f.children[i] = tombstone;
tombstones[e.children[i]] = tombstone;
}
}
else
foreach (i, c; e.children)
collect(c, addr.child(i));
}
collect(root, &rootAddress);
// Fail the reduction if there are less than two files to concatenate.
if (numFiles < 2)
{
root = origRoot;
break;
}
auto n = edit(r.address);
auto temp = new Entity;
temp.children = allData;
temp.optimizeUntil!((Entity e) => e in tombstones);
// The optimize function rearranges nodes in a tree,
// so we need to do a recursive scan to find their new location.
void makeRedirects(Address* address, Entity e)
{
if (auto p = e in tombstones)
p.redirect = address; // Patch the tombstone to point to the node's new location.
else
{
assert(!e.clean); // This node was created by optimize(), it can't be clean
foreach (i, child; e.children)
makeRedirects(address.child(i), child);
}
}
foreach (i, child; temp.children)
makeRedirects(r.address.child(n.children.length + i), child);
n.children ~= temp.children;
break;
}
case Reduction.Type.Swap:
{
// Cannot swap child and parent.
assert(
!r.address.startsWith(r.address2) &&
!r.address2.startsWith(r.address),
"Invalid swap");
// Corollary: neither address may be the root address.
// Cannot swap siblings (special case).
if (equal(r.address.parent, r.address2.parent))
break;
static struct SwapSite
{
Entity source; /// Entity currently at this site's address
Entity* target; /// Where to place the other site's entity
Address* newAddress; /// Address of target
Entity tombstone; /// Redirect to the other site's swap target
}
SwapSite prepareSwap(const(Address)* address)
{
auto p = edit(address.parent);
assert(address.index < p.children.length);
SwapSite result;
// Duplicate children.
// Replace the first half with redirects to the second half.
// The second half is the same as the old children,
// except with the target node replaced with the swap target.
auto children = new Entity[p.children.length * 2];
// First half:
foreach (i; 0 .. p.children.length)
{
auto tombstone = new Entity;
tombstone.kill();
if (i == address.index)
result.tombstone = tombstone;
else
tombstone.redirect = address.parent.child(p.children.length + i);
children[i] = tombstone;
}
// Second half:
foreach (i; 0 .. p.children.length)
{
if (i == address.index)
{
result.source = p.children[i];
result.target = &children[p.children.length + i];
result.newAddress = address.parent.child(p.children.length + i);
}
else
children[p.children.length + i] = p.children[i];
}
p.children = children;
return result;
}
auto site1 = prepareSwap(r.address);
auto site2 = prepareSwap(r.address2);
void finalizeSwap(ref SwapSite thisSite, ref SwapSite otherSite)
{
assert(otherSite.source);
*thisSite.target = otherSite.source;
thisSite.tombstone.redirect = otherSite.newAddress;
}
finalizeSwap(site1, site2);
finalizeSwap(site2, site1);
break;
}
}
if (root !is origRoot)
assert(!root.clean);
recalculate(root); // Recalculate cumulative information for the part of the tree that we edited
debug checkDescendants(root);
return root;
}
/// Polyfill for object.require
static if (!__traits(hasMember, object, "require"))
ref V require(K, V)(ref V[K] aa, K key, lazy V value = V.init)
{
auto p = key in aa;
if (p)
return *p;
return aa[key] = value;
}
// std.functional.memoize evicts old entries after a hash collision.
// We have much to gain by evicting in strictly chronological order.
struct RoundRobinCache(K, V)
{
V[K] lookup;
K[] keys;
size_t pos;
void requireSize(size_t size)
{
if (keys.length >= size)
return;
T roundUpToPowerOfTwo(T)(T x) { return nextPow2(x-1); }
keys.length = roundUpToPowerOfTwo(size);
}
ref V get(ref K key, lazy V value)
{
return lookup.require(key,
{
lookup.remove(keys[pos]);
keys[pos++] = key;
if (pos == keys.length)
pos = 0;
return value;
}());
}
}
alias ReductionCacheKey = Tuple!(Entity, q{origRoot}, Reduction, q{r});
RoundRobinCache!(ReductionCacheKey, Entity) reductionCache;
Entity applyReduction(Entity origRoot, ref Reduction r)
{
if (lookaheadProcesses.length)
{
if (!reductionCache.keys)
reductionCache.requireSize(1 + lookaheadProcesses.length);
auto cacheKey = ReductionCacheKey(origRoot, r);
return reductionCache.get(cacheKey, applyReductionImpl(origRoot, r));
}
else
return applyReductionImpl(origRoot, r);
}
string applyReductionToPath(string path, Reduction reduction)
{
if (reduction.type == Reduction.Type.ReplaceWord)
{
Entity[] words = parseToWords(path);
string result;
foreach (i, word; words)
{
if (i > 0 && i == words.length-1 && words[i-1].tail.endsWith("."))
result ~= word.head; // skip extension
else
if (word.head == reduction.from)
result ~= reduction.to;
else
result ~= word.head;
result ~= word.tail;
}
return result;
}
return path;
}
void autoRetry(scope void delegate() fun, lazy const(char)[] operation)
{
while (true)
try
{
fun();
return;
}
catch (Exception e)
{
writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg);
import core.thread;
Thread.sleep(dur!"seconds"(1));
writeln("Retrying...");
}
}
void deleteAny(string path)
{
if (exists(path))
{
if (isDir(path))
rmdirRecurse(path);
else
remove(path);
}
// The ugliest hacks, only for the ugliest operating system
version (Windows)
{
/// Alternative way to check for file existence
/// Files marked for deletion act as inexistant, but still prevent creation and appear in directory listings
bool exists2(string path)
{
return !dirEntries(dirName(path), baseName(path), SpanMode.shallow).empty;
}
enforce(!exists(path) && !exists2(path), "Path still exists"); // Windows only marks locked directories for deletion
}
}
void safeDelete(string path) { autoRetry({deleteAny(path);}, "delete " ~ path); }
void safeRename(string src, string dst) { autoRetry({rename(src, dst);}, "rename " ~ src ~ " to " ~ dst); }
void safeMkdir(in char[] path) { autoRetry({mkdirRecurse(path);}, "mkdir " ~ path); }
void safeReplace(string path, void delegate(string path) creator)
{
auto tmpPath = path ~ ".inprogress";
if (exists(tmpPath)) safeDelete(tmpPath);
auto oldPath = path ~ ".old";
if (exists(oldPath)) safeDelete(oldPath);
{
scope(failure) safeDelete(tmpPath);
creator(tmpPath);
}
if (exists(path)) safeRename(path, oldPath);
safeRename(tmpPath, path);
if (exists(oldPath)) safeDelete(oldPath);
}
void safeSave(Entity root, string savedir) { safeReplace(savedir, path => save(root, path)); }
void saveResult(Entity root)
{
if (!noSave)
measure!"resultSave"({safeSave(root, resultDir);});
}
struct Lookahead
{
Thread thread;
shared Pid pid;
string testdir;
EntityHash digest;
}
Lookahead[] lookaheadProcesses;
TestResult[EntityHash] lookaheadResults;
struct AccumulatingPredictor(double exp)
{
double r = 0.5;
void put(bool outcome)
{
r = (1 - exp) * r + exp * outcome;
}
double predict()
{
return r;
}
}
// Parameters found through empirical testing (gradient descent)
alias Predictor = AccumulatingPredictor!(0.01);
Predictor predictor;
version (Windows)
enum nullFileName = "nul";
else
enum nullFileName = "/dev/null";
bool[EntityHash] cache;
struct TestResult
{
bool success;
enum Source : ubyte
{
none,
tester,
lookahead,
diskCache,
ramCache,
}
Source source;
int status;
string reason()
{
final switch (source)
{
case Source.none:
assert(false);
case Source.tester:
return format("Test script %(%s%) exited with exit code %d (%s)",
[tester], status, (success ? "success" : "failure"));
case Source.lookahead:
return format("Test script %(%s%) (in lookahead) exited with exit code %d (%s)",
[tester], status, (success ? "success" : "failure"));
case Source.diskCache:
return "Test result was cached on disk as " ~ (success ? "success" : "failure");
case Source.ramCache:
return "Test result was cached in memory as " ~ (success ? "success" : "failure");
}
}
}
TestResult test(
Entity root, /// New root, with reduction already applied
Reduction[] reductions, /// For display purposes only
)
{
writef("%-(%s, %) => ", reductions); stdout.flush();
EntityHash digest = root.hash;
TestResult ramCached(lazy TestResult fallback)
{
auto cacheResult = digest in cache;
if (cacheResult)
{
// Note: as far as I can see, a cache hit for a positive reduction is not possible (except, perhaps, for a no-op reduction)
writeln(*cacheResult ? "Yes" : "No", " (cached)");
return TestResult(*cacheResult, TestResult.Source.ramCache);
}
auto result = fallback;
cache[digest] = result.success;
return result;
}
TestResult diskCached(lazy TestResult fallback)
{
tests++;
if (globalCache)
{
if (!exists(globalCache)) mkdirRecurse(globalCache);
string cacheBase = absolutePath(buildPath(globalCache, format("%016X", cast(ulong)digest.value))) ~ "-";
bool found;
measure!"globalCache"({ found = exists(cacheBase~"0"); });
if (found)
{
writeln("No (disk cache)");
return TestResult(false, TestResult.Source.diskCache);
}
measure!"globalCache"({ found = exists(cacheBase~"1"); });
if (found)
{
writeln("Yes (disk cache)");
return TestResult(true, TestResult.Source.diskCache);
}
auto result = fallback;
measure!"globalCache"({ autoRetry({ std.file.write(cacheBase ~ (result.success ? "1" : "0"), ""); }, "save result to disk cache"); });
return result;
}
else
return fallback;
}
TestResult lookahead(lazy TestResult fallback)
{
if (iter.strategy)
{
// Handle existing lookahead jobs
TestResult reap(ref Lookahead process, int status)
{
scope(success) process = Lookahead.init;
safeDelete(process.testdir);
if (process.thread)
process.thread.join(/*rethrow:*/true);
return lookaheadResults[process.digest] = TestResult(status == 0, TestResult.Source.lookahead, status);
}
foreach (ref process; lookaheadProcesses)
if (process.thread)
{
debug (DETERMINISTIC_LOOKAHEAD)
{
process.thread.join(/*rethrow:*/true);
process.thread = null;
}
auto pid = cast()atomicLoad(process.pid);
if (pid)
{
debug (DETERMINISTIC_LOOKAHEAD)
reap(process, pid.wait());
else
{
auto waitResult = pid.tryWait();
if (waitResult.terminated)
reap(process, waitResult.status);
}
}
}
static struct PredictedState
{
double probability;
ReductionIterator iter;
Predictor predictor;
}
auto initialState = new PredictedState(1.0, iter, predictor);
alias PredictionTree = RedBlackTree!(PredictedState*, (a, b) => a.probability > b.probability, true);
auto predictionTree = new PredictionTree((&initialState)[0..1]);
// Start new lookahead jobs
size_t numSteps;
foreach (ref process; lookaheadProcesses)
while (!process.thread && !predictionTree.empty)
{
auto state = predictionTree.front;
predictionTree.removeFront();
retryIter:
if (state.iter.done)
continue;
reductionCache.requireSize(lookaheadProcesses.length + ++numSteps);
auto reduction = state.iter.front;
Entity newRoot;
measure!"lookaheadApply"({ newRoot = state.iter.root.applyReduction(reduction); });
if (newRoot is state.iter.root)
{
state.iter.next(false);
goto retryIter; // inapplicable reduction
}
auto digest = newRoot.hash;
double prediction;
if (digest in cache || digest in lookaheadResults || lookaheadProcesses[].canFind!(p => p.thread && p.digest == digest))
{
if (digest in cache)
prediction = cache[digest] ? 1 : 0;
else
if (digest in lookaheadResults)
prediction = lookaheadResults[digest].success ? 1 : 0;
else
prediction = state.predictor.predict();
}
else
{
process.digest = digest;
static int counter;
process.testdir = dirSuffix("lookahead.%d".format(counter++));
// Saving and process creation are expensive.
// Don't block the main thread, use a worker thread instead.
static void runThread(Entity newRoot, ref Lookahead process, string tester)
{
process.thread = new Thread({
save(newRoot, process.testdir);
auto nul = File(nullFileName, "w+");
auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, process.testdir);
atomicStore(process.pid, cast(shared)pid);
});
process.thread.start();
}
runThread(newRoot, process, tester);
prediction = state.predictor.predict();
}
foreach (outcome; 0 .. 2)
{
auto probability = outcome ? prediction : 1 - prediction;
if (probability == 0)
continue; // no chance
probability *= state.probability; // accumulate
auto nextState = new PredictedState(probability, state.iter, state.predictor);
if (outcome)
nextState.iter.root = newRoot;
nextState.iter.next(!!outcome);
nextState.predictor.put(!!outcome);
predictionTree.insert(nextState);
}
}
// Find a result for the current test.
auto plookaheadResult = digest in lookaheadResults;
if (plookaheadResult)
{
writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)");
return *plookaheadResult;
}
foreach (ref process; lookaheadProcesses)
{
if (process.thread && process.digest == digest)
{
// Current test is already being tested in the background, wait for its result.
// Join the thread first, to guarantee that there is a pid
measure!"lookaheadWaitThread"({ process.thread.join(/*rethrow:*/true); });
process.thread = null;
auto pid = cast()atomicLoad(process.pid);
int exitCode;
measure!"lookaheadWaitProcess"({ exitCode = pid.wait(); });
auto result = reap(process, exitCode);
writeln(result.success ? "Yes" : "No", " (lookahead-wait)");
return result;
}
}
}
return fallback;
}
TestResult doTest()
{
string testdir = dirSuffix("test");
measure!"testSave"({save(root, testdir);}); scope(exit) measure!"clean"({safeDelete(testdir);});
Pid pid;
if (noRedirect)
pid = spawnShell(tester, null, Config.none, testdir);
else
{
auto nul = File(nullFileName, "w+");
pid = spawnShell(tester, nul, nul, nul, null, Config.none, testdir);
}
int status;
measure!"test"({status = pid.wait();});
auto result = TestResult(status == 0, TestResult.Source.tester, status);
writeln(result.success ? "Yes" : "No");
return result;
}
auto result = ramCached(diskCached(lookahead(doTest())));
if (trace) saveTrace(root, reductions, dirSuffix("trace"), result.success);
return result;
}
void saveTrace(Entity root, Reduction[] reductions, string dir, bool result)
{
if (!exists(dir)) mkdir(dir);
static size_t count;
string countStr = format("%08d-%(#%08d-%|%)%d",
count++,
reductions
.map!(reduction => reduction.address ? findEntityEx(root, reduction.address).entity : null)
.map!(target => target ? target.id : 0),
result ? 1 : 0);
auto traceDir = buildPath(dir, countStr);
save(root, traceDir);
if (doDump && result)
dumpSet(root, traceDir ~ ".dump");
}
void applyNoRemoveMagic(Entity root)
{
enum MAGIC_PREFIX = "DustMiteNoRemove";
enum MAGIC_START = MAGIC_PREFIX ~ "Start";
enum MAGIC_STOP = MAGIC_PREFIX ~ "Stop";
bool state = false;
bool scanString(string s)
{
if (s.length == 0)
return false;
if (s.canFind(MAGIC_START))
state = true;
if (s.canFind(MAGIC_STOP))
state = false;
return state;
}
bool scan(Entity e)
{
bool removeThis;
removeThis = scanString(e.head);
foreach (c; e.children)
removeThis |= scan(c);
removeThis |= scanString(e.tail);
e.noRemove |= removeThis;
return removeThis;
}
scan(root);
}
void applyNoRemoveRules(Entity root, RemoveRule[] removeRules)
{
if (!removeRules.length)
return;
// By default, for content not covered by any of the specified
// rules, do the opposite of the first rule.
// I.e., if the default rule is "remove only", then by default
// don't remove anything except what's specified by the rule.
bool defaultRemove = !removeRules.front.remove;
auto files = root.isFile ? [root] : root.children;
foreach (f; files)
{
assert(f.isFile);
// Check file name
bool removeFile = defaultRemove;
foreach (rule; removeRules)
{
if (
(rule.shellGlob && f.filename.globMatch(rule.shellGlob))
||
(rule.regexp !is Regex!char.init && f.filename.match(rule.regexp))
)
removeFile = rule.remove;
}
auto removeChar = new bool[f.contents.length];
removeChar[] = removeFile;
foreach (rule; removeRules)
if (rule.regexp !is Regex!char.init)
foreach (m; f.contents.matchAll(rule.regexp))
{
auto start = m.hit.ptr - f.contents.ptr;
auto end = start + m.hit.length;
removeChar[start .. end] = rule.remove;
}
bool scanString(string s)
{
if (!s.length)
return true;
auto start = s.ptr - f.contents.ptr;
auto end = start + s.length;
return removeChar[start .. end].all;
}
bool scan(Entity e)
{
bool remove = true;
remove &= scanString(e.head);
foreach (c; e.children)
remove &= scan(c);
remove &= scanString(e.tail);
if (!remove)
e.noRemove = root.noRemove = true;
return remove;
}
scan(f);
}
}
void applyNoRemoveDeps(Entity root)
{
static bool isNoRemove(Entity e)
{
if (e.noRemove)
return true;
foreach (dependent; e.dependents)
if (isNoRemove(dependent.entity))
return true;
return false;
}
// Propagate upwards
static bool fill(Entity e)
{
e.noRemove |= isNoRemove(e);
foreach (c; e.children)
e.noRemove |= fill(c);
return e.noRemove;
}
fill(root);
}
void loadCoverage(Entity root, string dir)
{
void scanFile(Entity f)
{
auto fn = buildPath(dir, setExtension(baseName(f.filename), "lst"));
if (!exists(fn))
return;
writeln("Loading coverage file ", fn);
static bool covered(string line)
{
enforce(line.length >= 8 && line[7]=='|', "Invalid syntax in coverage file");
line = line[0..7];
return line != "0000000" && line != " ";
}
auto lines = map!covered(splitLines(readText(fn))[0..$-1]);
uint line = 0;
bool coverString(string s)
{
bool result;
foreach (char c; s)
{
result |= lines[line];
if (c == '\n')
line++;
}
return result;
}
bool cover(ref Entity e)
{
bool result;
result |= coverString(e.head);
foreach (ref c; e.children)
result |= cover(c);
result |= coverString(e.tail);
e.noRemove |= result;
return result;
}
foreach (ref c; f.children)
f.noRemove |= cover(c);
}
void scanFiles(Entity e)
{
if (e.isFile)
scanFile(e);
else
foreach (c; e.children)
scanFiles(c);
}
scanFiles(root);
}
void assignID(Entity e)
{
static int counter;
e.id = ++counter;
foreach (c; e.children)
assignID(c);
}
void convertRefs(Entity root)
{
Address*[int] addresses;
void collectAddresses(Entity e, Address* address)
{
assert(e.id !in addresses);
addresses[e.id] = address;
assert(address.children.length == 0);
foreach (i, c; e.children)
{
auto childAddress = new Address(address, i, address.depth + 1);
address.children ~= childAddress;
collectAddresses(c, childAddress);
}
assert(address.children.length == e.children.length);
}
collectAddresses(root, &rootAddress);
void convertRef(ref EntityRef r)
{
assert(r.entity && !r.address);
r.address = addresses[r.entity.id];
r.entity = null;
}
void convertRefs(Entity e)
{
foreach (ref r; e.dependents)
convertRef(r);
foreach (c; e.children)
convertRefs(c);
}
convertRefs(root);
}
struct FindResult
{
Entity entity; /// null if gone
const Address* address; /// the "real" (no redirects) address, null if gone
}
FindResult findEntity(Entity root, const(Address)* addr)
{
auto result = findEntityEx(root, addr);
if (result.dead)
return FindResult(null, null);
return FindResult(result.entity, result.address);
}
struct FindResultEx
{
Entity entity; /// never null
const Address* address; /// never null
bool dead; /// a dead node has been traversed to get here
}
static FindResultEx findEntityEx(Entity root, const(Address)* addr)
{
if (!addr.parent) // root
return FindResultEx(root, addr, root.dead);
auto r = findEntityEx(root, addr.parent);
auto e = r.entity.children[addr.index];
if (e.redirect)
{
assert(e.dead);
return findEntityEx(root, e.redirect); // shed the "dead" flag here
}
addr = r.address.child(addr.index); // apply redirects in parents
return FindResultEx(e, addr, r.dead || e.dead); // accumulate the "dead" flag
}
struct AddressRange
{
const(Address)*address;
bool empty() { return !address.parent; }
size_t front() { return address.index; }
void popFront() { address = address.parent; }
}
void dumpSet(Entity root, string fn)
{
auto f = File(fn, "wb");
string printable(string s) { return s is null ? "null" : `"` ~ s.replace("\\", `\\`).replace("\"", `\"`).replace("\r", `\r`).replace("\n", `\n`) ~ `"`; }
string printableFN(string s) { return "/*** " ~ s ~ " ***/"; }
int[][int] dependencies;
bool[int] redirects;
void scanDependencies(Entity e)
{
foreach (d; e.dependents)
{
auto dependent = findEntityEx(root, d.address).entity;
if (dependent)
dependencies[dependent.id] ~= e.id;
}
foreach (c; e.children)
scanDependencies(c);
if (e.redirect)
{
auto target = findEntityEx(root, e.redirect).entity;
if (target)
redirects[target.id] = true;
}
}
scanDependencies(root);
void print(Entity e, int depth)
{
auto prefix = replicate(" ", depth);
// if (!fileLevel) { f.writeln(prefix, "[ ... ]"); continue; }
f.write(
prefix,
"[",
e.noRemove ? "!" : "",
e.dead ? "X" : "",
);
if (e.children.length == 0)
{
f.write(
" ",
e.redirect ? "-> " ~ text(findEntityEx(root, e.redirect).entity.id) ~ " " : "",
e.isFile ? e.filename ? printableFN(e.filename) ~ " " : null : e.head ? printable(e.head) ~ " " : null,
e.tail ? printable(e.tail) ~ " " : null,
e.comment ? "/* " ~ e.comment ~ " */ " : null,
"]"
);
}
else
{
f.writeln(e.comment ? " // " ~ e.comment : null);
if (e.isFile) f.writeln(prefix, " ", printableFN(e.filename));
if (e.head) f.writeln(prefix, " ", printable(e.head));
foreach (c; e.children)
print(c, depth+1);
if (e.tail) f.writeln(prefix, " ", printable(e.tail));
f.write(prefix, "]");
}
if (e.dependents.length || e.id in redirects || trace)
f.write(" =", e.id);
if (e.id in dependencies)
{
f.write(" =>");
foreach (d; dependencies[e.id])
f.write(" ", d);
}
f.writeln();
}
print(root, 0);
f.close();
}
void dumpToHtml(Entity root, string fn)
{
auto buf = appender!string();
void dumpText(string s)
{
foreach (c; s)
switch (c)
{
case '<':
buf.put("&lt;");
break;
case '>':
buf.put("&gt;");
break;
case '&':
buf.put("&amp;");
break;
default:
buf.put(c);
}
}
void dump(Entity e)
{
if (e.isFile)
{
buf.put("<h1>");
dumpText(e.filename);
buf.put("</h1><pre>");
foreach (c; e.children)
dump(c);
buf.put("</pre>");
}
else
{
buf.put("<span>");
dumpText(e.head);
foreach (c; e.children)
dump(c);
dumpText(e.tail);
buf.put("</span>");
}
}
buf.put(q"EOT
<style> pre span:hover { outline: 1px solid rgba(0,0,0,0.2); background-color: rgba(100,100,100,0.1 ); } </style>
EOT");
dump(root);
std.file.write(fn, buf.data());
}
// void dumpText(string fn, ref Reduction r = nullReduction)
// {
// auto f = File(fn, "wt");
// dump(root, r, (string) {}, &f.write!string);
// f.close();
// }
version(testsuite)
shared static this()
{
import core.runtime;
"../cov".mkdir.collectException();
dmd_coverDestPath("../cov");
dmd_coverSetMerge(true);
}