Update DustMite

Commits: * 1f0a0cd dustmite: Fix deprecated use of canFind * 1035b15 dustmite: Improve progress indicator accuracy * 556a9ab dsplit: Avoid stack overflow with very long lists * 6b8f49c Avoid std.string.startsWith, which does pointless UTF-8 decoding * 8eb5282 dsplit: Don't parse DDoc files as D source * 403ef2f dsplit: Fix possible range violations in skipSymbol
2025-04-26 13:10:36 +03:00 · 2014-01-07 12:05:01 +00:00 · 2014-01-07 12:05:01 +00:00 · bea9b9957d
commit bea9b9957d
parent f210332a70
2 changed files with 135 additions and 98 deletions
--- a/DustMite/dsplit.d
+++ b/DustMite/dsplit.d
@ -4,11 +4,12 @@

 module dsplit;

+import std.ascii;
+import std.algorithm;
+import std.array;
 import std.file;
 import std.path;
 import std.string;
-import std.ascii;
-import std.array;
 debug import std.stdio;

 class Entity
@ -106,6 +107,12 @@ void optimize(Entity set)

 private:

+/// Override std.string nonsense, which does UTF-8 decoding
+bool startsWith(in char[] big, in char[] small) { return big.length >= small.length && big[0..small.length] == small; }
+bool startsWith(in char[] big, char c) { return big.length && big[0] == c; }
+
+const DExtensions = [".d", ".di"];
+
 Entity loadFile(string name, string path, ParseOptions options)
 {
 	debug writeln("Loading ", path);
@ -114,110 +121,123 @@ Entity loadFile(string name, string path, ParseOptions options)
 	result.contents = cast(string)read(path);

 	if (options.stripComments)
-		if (extension(path) == ".d" || extension(path) == ".di")
+		if (DExtensions.canFind(path.extension.toLower))
 			result.contents = stripDComments(result.contents);

 	final switch (options.mode)
 	{
 	case ParseOptions.Mode.Source:
-		switch (extension(path))
-		{
-		case ".d":
-		case ".di":
-			result.children = parseD(result.contents); return result;
+		if (DExtensions.canFind(path.extension.toLower) && !result.contents.startsWith("Ddoc"))
+			result.children = parseD(result.contents);
+		else
 		// One could add custom splitters for other languages here - for example, a simple line/word/character splitter for most text-based formats
-		default:
-			result.children = [new Entity(result.contents, null, null)]; return result;
-		}
+			result.children = [new Entity(result.contents, null, null)];
+		break;
 	case ParseOptions.Mode.Words:
-		result.children = parseToWords(result.contents); return result;
+		result.children = parseToWords(result.contents);
+		break;
 	}
+	return result;
+}
+
+class EndOfInput : Throwable { this() { super(null); } }
+void lchop(ref string r, size_t n = 1)
+{
+	if (r.length < n)
+		throw new EndOfInput;
+	r = r[n..$];
 }

 string skipSymbol(string s, ref size_t i)
 {
-	auto start = i;
-	switch (s[i])
-	{
-	case '\'':
-		i++;
-		if (s[i] == '\\')
-			i+=2;
-		while (s[i] != '\'')
-			i++;
-		i++;
-		break;
-	case '\\':
-		i+=2;
-		break;
-	case '"':
-		if (i && s[i-1] == 'r')
+	auto r = s[i..$];
+
+	try
+		switch (r[0])
 		{
-			i++;
-			while (s[i] != '"')
-				i++;
-			i++;
-		}
-		else
-		{
-			i++;
-			while (s[i] != '"')
-			{
-				if (s[i] == '\\')
-					i+=2;
-				else
-					i++;
-			}
-			i++;
-		}
-		break;
-	case '`':
-		i++;
-		while (s[i] != '`')
-			i++;
-		i++;
-		break;
-	case '/':
-		i++;
-		if (i==s.length)
+		case '\'':
+			r.lchop();
+			if (r.startsWith('\\'))
+				r.lchop(2);
+			while (!r.startsWith('\''))
+				r.lchop();
+			r.lchop();
 			break;
-		else
-		if (s[i] == '/')
-		{
-			while (i < s.length && s[i] != '\r' && s[i] != '\n')
-				i++;
-		}
-		else
-		if (s[i] == '*')
-		{
-			i+=3;
-			while (s[i-2] != '*' || s[i-1] != '/')
-				i++;
-		}
-		else
-		if (s[i] == '+')
-		{
-			i++;
-			int commentLevel = 1;
-			while (commentLevel)
+		case '\\':
+			r.lchop(2);
+			break;
+		case '"':
+			r.lchop();
+			while (!r.startsWith('"'))
 			{
-				if (s[i] == '/' && s[i+1]=='+')
-					commentLevel++, i+=2;
+				if (r.startsWith('\\'))
+					r.lchop(2);
 				else
-				if (s[i] == '+' && s[i+1]=='/')
-					commentLevel--, i+=2;
-				else
-					i++;
+					r.lchop();
 			}
+			r.lchop();
+			break;
+		case 'r':
+			if (r.startsWith(`r"`))
+			{
+				r.lchop(2);
+				while (!r.startsWith('"'))
+					r.lchop();
+				r.lchop();
+				break;
+			}
+			else
+				goto default;
+		case '`':
+			r.lchop();
+			while (!r.startsWith('`'))
+				r.lchop();
+			r.lchop();
+			break;
+		case '/':
+			r.lchop();
+			if (r.startsWith('/'))
+			{
+				while (!r.startsWith('\r') && !r.startsWith('\n'))
+					r.lchop();
+			}
+			else
+			if (r.startsWith('*'))
+			{
+				r.lchop();
+				while (!r.startsWith("*/"))
+					r.lchop();
+				r.lchop(2);
+			}
+			else
+			if (r.startsWith('+'))
+			{
+				r.lchop();
+				int commentLevel = 1;
+				while (commentLevel)
+				{
+					if (r.startsWith("/+"))
+						commentLevel++, r.lchop(2);
+					else
+					if (r.startsWith("+/"))
+						commentLevel--, r.lchop(2);
+					else
+						r.lchop();
+				}
+			}
+			else
+				r.lchop();
+			break;
+		default:
+			r.lchop();
+			break;
 		}
-		else
-			i++;
-		break;
-	default:
-		i++;
-		break;
-	}
-	return s[start..i];
+	catch (EndOfInput)
+		r = null;
+
+	auto len = s.length - i - r.length;
+	i += len;
+	return s[i-len..i];
 }

 /// Moves i forward over first series of EOL characters, or until first non-whitespace character
@ -364,13 +384,13 @@ string stripDComments(string s)
 	return result.data;
 }

-void postProcessD(ref Entity[] entities)
+void postProcessD(ref Entity[] entities, int depth=0)
 {
 	for (int i=0; i<entities.length;)
 	{
 		// Process comma-separated lists. Nest later items and add a dependency for the comma.

-		if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == ",")
+		if (i+2 <= entities.length && entities[i].children.length >= 1 && entities[i].tail.stripD() == "," && depth < 100)
 		{
 			// Put the comma in its own entity, so it can have a dependency
 			auto comma = new Entity(entities[i].tail);
@ -404,9 +424,9 @@ void postProcessD(ref Entity[] entities)
 		{
 			entities.replaceInPlace(i, i+2, [new Entity(null, entities[i..i+2].dup, null)]);
 			continue;
-		}	
+		}

-		postProcessD(entities[i].children);
+		postProcessD(entities[i].children, depth+1);
 		i++;
 	}
 }
--- a/DustMite/dustmite.d
+++ b/DustMite/dustmite.d
@ -21,13 +21,16 @@ import std.random;

 import dsplit;

+// Issue 314 workarounds
 alias std.string.join join;
+alias std.string.startsWith startsWith;

 string dir, resultDir, tester, globalCache;
 string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }

 size_t maxBreadth;
 Entity root;
+size_t origDescendants;
 bool concatPerformed;
 int tests; bool foundAnything;
 bool noSave, trace;
@ -75,12 +78,13 @@ struct Reduction
 				foreach (i, a; address)
 				{
 					segments[i] = binary ? text(a) : format("%d/%d", e.children.length-a, e.children.length);
-					foreach (c; e.children[a+1..$])
+					foreach (c; e.children[0..a])
 						progress += c.descendants;
 					progress++; // account for this node
 					e = e.children[a];
 				}
-				return format("[%5.1f%%] %s [%s]", progress * 100.0 / root.descendants, name, segments.join(binary ? "" : ", "));
+				progress += e.descendants;
+				return format("[%5.1f%%] %s [%s]", (origDescendants-progress) * 100.0 / origDescendants, name, segments.join(binary ? "" : ", "));
 		}
 	}
 }
@ -190,6 +194,7 @@ EOS");
 		optimize(root);
 	maxBreadth = getMaxBreadth(root);
 	countDescendants(root);
+	resetProgress();
 	assignID(root);

 	if (dump)
@ -290,9 +295,15 @@ bool testAddress(size_t[] address)
 		return false;
 }

+void resetProgress()
+{
+	origDescendants = root.descendants;
+}
+
 void testLevel(int testDepth, out bool tested, out bool changed)
 {
 	tested = changed = false;
+	resetProgress();

 	enum MAX_DEPTH = 1024;
 	size_t[MAX_DEPTH] address;
@ -328,6 +339,12 @@ void testLevel(int testDepth, out bool tested, out bool changed)
 	//writefln("Scan results: tested=%s, changed=%s", tested, changed);
 }

+void startIteration(int iterCount)
+{
+	writefln("############### ITERATION %d ################", iterCount);
+	resetProgress();
+}
+
 /// Keep going deeper until we find a successful reduction.
 /// When found, finish tests at current depth and restart from top depth (new iteration).
 /// If we reach the bottom (depth with no nodes on it), we're done.
@ -337,7 +354,7 @@ void reduceCareful()
 	int iterCount;
 	do
 	{
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);
 		bool changed;
 		int depth = 0;
 		do
@ -364,7 +381,7 @@ void reduceLookback()
 	do
 	{
 		iterationChanged = false;
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);

 		int depth = 0, maxDepth = 0;
 		bool depthTested;
@ -404,7 +421,7 @@ void reduceInDepth()
 	do
 	{
 		changed = false;
-		writefln("############### ITERATION %d ################", iterCount++);
+		startIteration(iterCount++);

 		enum MAX_DEPTH = 1024;
 		size_t[MAX_DEPTH] address;
@ -1000,7 +1017,7 @@ void applyNoRemoveRegex(string[] noRemoveStr)
 	foreach (f; files)
 	{
 		assert(f.isFile);
-		if (canFind!((a){return !match(f.filename, a).empty;})(noRemove))
+		if (noRemove.any!(a => !match(f.filename, a).empty))
 		{
 			mark(f);
 			root.noRemove = true;