Update DustMite

Commits: * 1f0a0cd dustmite: Fix deprecated use of canFind * 1035b15 dustmite: Improve progress indicator accuracy * 556a9ab dsplit: Avoid stack overflow with very long lists * 6b8f49c Avoid std.string.startsWith, which does pointless UTF-8 decoding * 8eb5282 dsplit: Don't parse DDoc files as D source * 403ef2f dsplit: Fix possible range violations in skipSymbol
2025-04-28 06:00:37 +03:00 · 2014-01-07 12:05:01 +00:00 · 2014-01-07 12:05:01 +00:00 · bea9b9957d
commit bea9b9957d
parent f210332a70
2 changed files with 135 additions and 98 deletions
--- a/DustMite/dsplit.d
+++ b/DustMite/dsplit.d
@ -4,11 +4,12 @@
 module dsplit;
 import std.ascii;
 import std.algorithm;
 import std.array;
 import std.file;
 import std.path;
 import std.string;
 import std.ascii;
 import std.array;
 debug import std.stdio;
 class Entity
@ -106,6 +107,12 @@ void optimize(Entity set)
 private:
 /// Override std.string nonsense, which does UTF-8 decoding
 bool startsWith(in char[] big, in char[] small) { return big.length >= small.length && big[0..small.length] == small; }
 bool startsWith(in char[] big, char c) { return big.length && big[0] == c; }
 const DExtensions = [".d", ".di"];
 Entity loadFile(string name, string path, ParseOptions options)
 {
 	debug writeln("Loading ", path);
@ -114,110 +121,123 @@ Entity loadFile(string name, string path, ParseOptions options)
 	result.contents = cast(string)read(path);
 	if (options.stripComments)
-		if (extension(path) == ".d" || extension(path) == ".di")
+		if (DExtensions.canFind(path.extension.toLower))
 			result.contents = stripDComments(result.contents);
 	final switch (options.mode)
 	{
 	case ParseOptions.Mode.Source:
-		switch (extension(path))
+		if (DExtensions.canFind(path.extension.toLower) && !result.contents.startsWith("Ddoc"))
-		{
+			result.children = parseD(result.contents);
-		case ".d":
+		else
 		case ".di":
 			result.children = parseD(result.contents); return result;
 		// One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
-		default:
+			result.children = [new Entity(result.contents, null, null)];
-			result.children = [new Entity(result.contents, null, null)]; return result;
+		break;
 		}
 	case ParseOptions.Mode.Words:
-		result.children = parseToWords(result.contents); return result;
+		result.children = parseToWords(result.contents);
 		break;
 	}
 	return result;
 }
 class EndOfInput : Throwable { this() { super(null); } }
 void lchop(ref string r, size_t n = 1)
 {
 	if (r.length < n)
 		throw new EndOfInput;
 	r = r[n..$];
 }
 string skipSymbol(string s, ref size_t i)
 {
-	auto start = i;
+	auto r = s[i..$];
-	switch (s[i])
+
-	{
+	try
-	case '\'':
+		switch (r[0])
 		i++;
 		if (s[i] == '\\')
 			i+=2;
 		while (s[i] != '\'')
 			i++;
 		i++;
 		break;
 	case '\\':
 		i+=2;
 		break;
 	case '"':
 		if (i && s[i-1] == 'r')
 		{
-			i++;
+		case '\'':
-			while (s[i] != '"')
+			r.lchop();
-				i++;
+			if (r.startsWith('\\'))
-			i++;
+				r.lchop(2);
-		}
+			while (!r.startsWith('\''))
-		else
+				r.lchop();
-		{
+			r.lchop();
 			i++;
 			while (s[i] != '"')
 			{
 				if (s[i] == '\\')
 					i+=2;
 				else
 					i++;
 			}
 			i++;
 		}
 		break;
 	case '`':
 		i++;
 		while (s[i] != '`')
 			i++;
 		i++;
 		break;
 	case '/':
 		i++;
 		if (i==s.length)
 			break;
-		else
+		case '\\':
-		if (s[i] == '/')
+			r.lchop(2);
-		{
+			break;
-			while (i < s.length && s[i] != '\r' && s[i] != '\n')
+		case '"':
-				i++;
+			r.lchop();
-		}
+			while (!r.startsWith('"'))
 		else
 		if (s[i] == '*')
 		{
 			i+=3;
 			while (s[i-2] != '*' || s[i-1] != '/')
 				i++;
 		}
 		else
 		if (s[i] == '+')
 		{
 			i++;
 			int commentLevel = 1;
 			while (commentLevel)
 			{
-				if (s[i] == '/' && s[i+1]=='+')
+				if (r.startsWith('\\'))
-					commentLevel++, i+=2;
+					r.lchop(2);
 				else
-				if (s[i] == '+' && s[i+1]=='/')
+					r.lchop();
 					commentLevel--, i+=2;
 				else
 					i++;
 			}
 			r.lchop();
 			break;
 		case 'r':
 			if (r.startsWith(`r"`))
 			{
 				r.lchop(2);
 				while (!r.startsWith('"'))
 					r.lchop();
 				r.lchop();
 				break;
 			}
 			else
 				goto default;
 		case '`':
 			r.lchop();
 			while (!r.startsWith('`'))
 				r.lchop();
 			r.lchop();
 			break;
 		case '/':
 			r.lchop();
 			if (r.startsWith('/'))
 			{
 				while (!r.startsWith('\r') && !r.startsWith('\n'))
 					r.lchop();
 			}
 			else
 			if (r.startsWith('*'))
 			{
 				r.lchop();
 				while (!r.startsWith("*/"))
 					r.lchop();
 				r.lchop(2);
 			}
 			else
 			if (r.startsWith('+'))
 			{
 				r.lchop();
 				int commentLevel = 1;
 				while (commentLevel)
 				{
 					if (r.startsWith("/+"))
 						commentLevel++, r.lchop(2);
 					else
 					if (r.startsWith("+/"))
 						commentLevel--, r.lchop(2);
 					else
 						r.lchop();
 				}
 			}
 			else
 				r.lchop();
 			break;
 		default:
 			r.lchop();
 			break;
 		}
-		else
+	catch (EndOfInput)
-			i++;
+		r = null;
-		break;
+
-	default:
+	auto len = s.length - i - r.length;
-		i++;
+	i += len;
-		break;
+	return s[i-len..i];
 	}
 	return s[start..i];
 }
 /// Moves i forward over first series of EOL characters, or until first non-whitespace character
@ -364,13 +384,13 @@ string stripDComments(string s)
 	return result.data;
 }
-void postProcessD(ref Entity[] entities)
+void postProcessD(ref Entity[] entities, int depth=0)
 {
 	for (int i=0; i<entities.length;)
 	{
 		// Process comma-separated lists. Nest later items and add a dependency for the comma.
-		if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",")
+		if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == "," && depth < 100)
 		{
 			// Put the comma in its own entity, so it can have a dependency
 			auto comma = new Entity(entities[i].tail);
@ -406,7 +426,7 @@ void postProcessD(ref Entity[] entities)
 			continue;
 		}
-		postProcessD(entities[i].children);
+		postProcessD(entities[i].children, depth+1);
 		i++;
 	}
 }
--- a/DustMite/dustmite.d
+++ b/DustMite/dustmite.d
@ -21,13 +21,16 @@ import std.random;
 import dsplit;
 // Issue 314 workarounds
 alias std.string.join join;
 alias std.string.startsWith startsWith;
 string dir, resultDir, tester, globalCache;
 string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }
 size_t maxBreadth;
 Entity root;
 size_t origDescendants;
 bool concatPerformed;
 int tests; bool foundAnything;
 bool noSave, trace;
@ -75,12 +78,13 @@ struct Reduction
 				foreach (i, a; address)
 				{
 					segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length);
-					foreach (c; e.children[a+1..$])
+					foreach (c; e.children[0..a])
 						progress += c.descendants;
 					progress++; // account for this node
 					e = e.children[a];
 				}
-				return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", "));
+				progress += e.descendants;
 				return format("[%5.1f%%] %s [%s]", (origDescendants-progress) * 100.0 / origDescendants, name, segments.join(binary ? "" : ", "));
 		}
 	}
 }
@ -190,6 +194,7 @@ EOS");
 		optimize(root);
 	maxBreadth = getMaxBreadth(root);
 	countDescendants(root);
 	resetProgress();
 	assignID(root);
 	if (dump)
@ -290,9 +295,15 @@ bool testAddress(size_t[] address)
 		return false;
 }
 void resetProgress()
 {
 	origDescendants = root.descendants;
 }
 void testLevel(int testDepth, out bool tested, out bool changed)
 {
 	tested = changed = false;
 	resetProgress();
 	enum MAX_DEPTH = 1024;
 	size_t[MAX_DEPTH] address;
@ -328,6 +339,12 @@ void testLevel(int testDepth, out bool tested, out bool changed)
 	//writefln("Scan results: tested=%s, changed=%s", tested, changed);
 }
 void startIteration(int iterCount)
 {
 	writefln("############### ITERATION %d ################", iterCount);
 	resetProgress();
 }
 /// Keep going deeper until we find a successful reduction.
 /// When found, finish tests at current depth and restart from top depth (new iteration).
 /// If we reach the bottom (depth with no nodes on it), we're done.
@ -337,7 +354,7 @@ void reduceCareful()
 	int iterCount;
 	do
 	{
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);
 		bool changed;
 		int depth = 0;
 		do
@ -364,7 +381,7 @@ void reduceLookback()
 	do
 	{
 		iterationChanged = false;
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);
 		int depth = 0, maxDepth = 0;
 		bool depthTested;
@ -404,7 +421,7 @@ void reduceInDepth()
 	do
 	{
 		changed = false;
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);
 		enum MAX_DEPTH = 1024;
 		size_t[MAX_DEPTH] address;
@ -1000,7 +1017,7 @@ void applyNoRemoveRegex(string[] noRemoveStr)
 	foreach (f; files)
 	{
 		assert(f.isFile);
-		if (canFind!((a){return !match(f.filename, a).empty;})(noRemove))
+		if (noRemove.any!(a => !match(f.filename, a).empty))
 		{
 			mark(f);
 			root.noRemove = true;