Lots of optimization. Updated GDC portion of build script

2014-01-21 23:26:23 -08:00 · 2014-01-21 23:26:23 -08:00 · 4ec5af9093
parent a060dabde7
commit 4ec5af9093
5 changed files with 132 additions and 90 deletions
--- a/build.sh
+++ b/build.sh
@ -11,7 +11,7 @@ dmd\
 	stdx/d/*.d\
 	analysis/*.d\
 	-ofdscanner\
-	-m64\
+	-m64 -g\
 	-O -release -noboundscheck -inline

 #gdc\
@ -23,9 +23,9 @@ dmd\
 #	astprinter.d\
 #	formatter.d\
 #	outliner.d\
-#	style.d\
 #	stdx/*.d\
 #	stdx/d/*.d\
+#	analysis/*.d\
 #	-O3 -frelease -fno-bounds-check\
 #	-odscanner\

@ -38,8 +38,8 @@ dmd\
 #	astprinter.d\
 #	formatter.d\
 #	outliner.d\
-#	style.d\
 #	stdx/*.d\
 #	stdx/d/*.d\
+#	analysis/*.d\
 #	-O3 -release\
 #	-oq -of=dscanner\
--- a/main.d
+++ b/main.d
@ -152,7 +152,7 @@ int main(string[] args)
 				ulong count;
 				foreach (f; expandArgs(args, recursive))
 				{
-					import core.memory;
+
 					LexerConfig config;
 					config.whitespaceBehavior = WhitespaceBehavior.skip;
 					config.stringBehavior = StringBehavior.source;
@ -162,7 +162,6 @@ int main(string[] args)
 						count += printTokenCount(stdout, f, tokens);
 					else
 						count += printLineCount(stdout, f, tokens);
-					cache.printStats();
 				}
 				writefln("total:\t%d", count);
 			}
--- a/stats.d
+++ b/stats.d
@ -32,7 +32,6 @@ pure nothrow bool isLineOfCode(IdType t)

 ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
 {
-
 	ulong c;
 	foreach (ref t; tokens)
 	{
--- a/stdx/d/lexer.d
+++ b/stdx/d/lexer.d
@ -425,7 +425,6 @@ public struct DLexer
 	public void popFront() pure
 	{
 		_popFront();
-		string comment = null;
 		switch (front.type)
 		{
 			case tok!"comment":
@ -433,7 +432,11 @@ public struct DLexer
 				{
 					import std.string;
 					if (isDocComment(front.text))
-						comment = comment == null ? front.text : format("%s\n%s", comment, front.text);
+					{
+						_front.comment = _front.comment == null
+							? front.text
+							: format("%s\n%s", _front.comment, front.text);
+					}
 					do _popFront(); while (front == tok!"comment");
 					if (front == tok!"whitespace") goto case tok!"whitespace";
 				}
@ -448,7 +451,6 @@ public struct DLexer
 			default:
 				break;
 		}
-		_front.comment = comment;
 	}


@ -715,17 +717,16 @@ public struct DLexer
 				lexExponent(type);
 				break decimalLoop;
 			case '.':
-				if (foundDot || !range.canPeek(1) || range.peek(1)[1] == '.')
+				if (foundDot || !range.canPeek(1) || range.peekAt(1) == '.')
 					break decimalLoop;
 				else
 				{
-					auto lookahead = range.peek(1);
 					// The following bit of silliness tries to tell the
 					// difference between "int dot identifier" and
 					// "double identifier".
-					if (lookahead.length == 2)
+					if (range.canPeek(1))
 					{
-						switch (lookahead[1])
+						switch (range.peekAt(1))
 						{
 						case '0': .. case '9':
 							goto doubleLiteral;
@ -1362,7 +1363,6 @@ public struct DLexer
 	Token lexIdentifier() pure nothrow
 	{
 		import std.stdio;
-		debug(1) try { writeln("lexIdentifier"); } catch (Exception e) {}
 		mixin (tokenStart);
 		uint hash = 0;
 		while (!range.empty && !isSeparating(0))
@ -1418,25 +1418,28 @@ public struct DLexer
 	{
 		if (range.front == '\n') return true;
 		if (range.front == '\r') return true;
-		auto lookahead = range.peek(3);
-		if (lookahead.length == 0) return false;
-		if (lookahead == "\u2028" || lookahead == "\u2029")
-			return true;
-		return false;
+		return (range.front & 0x80) && range.canPeek(2)
+			&& (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
 	}

-	bool isSeparating(size_t offset) const pure nothrow @safe
+	bool isSeparating(size_t offset) pure nothrow @safe
 	{
-		auto r = range.save();
-		r.popFrontN(offset);
-		auto c = r.front;
+		if (!range.canPeek(offset)) return false;
+		auto c = range.peekAt(offset);
+		if (c >= 'A' && c <= 'Z') return false;
+		if (c >= 'a' && c <= 'z') return false;
 		if (c <= 0x2f) return true;
 		if (c >= ':' && c <= '@') return true;
 		if (c >= '[' && c <= '^') return true;
 		if (c >= '{' && c <= '~') return true;
 		if (c == '`') return true;
-		if (c & 0x80 && (r.peek(3) == "\u2028"
-			|| range.peek(3) == "\u2029")) return true;
+		if (c & 0x80)
+		{
+			auto r = range;
+			range.popFrontN(offset);
+			return (r.canPeek(2) && (r.peek(2) == "\u2028"
+				|| r.peek(2) == "\u2029"));
+		}
 		return false;
 	}

--- a/stdx/lexer.d
+++ b/stdx/lexer.d
@ -193,90 +193,130 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 	alias staticTokens, alias dynamicTokens, alias pseudoTokens,
 	alias pseudoTokenHandlers, alias possibleDefaultTokens)
 {
+
+	static string generateMask(const ubyte[] arr)
+	{
+		import std.string;
+		ulong u;
+		for (size_t i = 0; i < arr.length && i < 8; i++)
+		{
+			u |= (cast(ulong) arr[i]) << (i * 8);
+		}
+		return format("0x%016x", u);
+	}
+
+	static string generateByteMask(size_t l)
+	{
+		import std.string;
+		return format("0x%016x", ulong.max >> ((8 - l) * 8));
+	}
+
 	static string generateCaseStatements(string[] tokens)
 	{
 		import std.conv;
 		import std.string;

-		static string generateMask(const ubyte[] arr)
-		{
-			ulong u;
-			for (size_t i = 0; i < arr.length && i < 8; i++)
-			{
-				u |= (cast(ulong) arr[i]) << (i * 8);
-			}
-			return format("0x%016x", u);
-		}
-
-		static string generateByteMask(size_t l)
-		{
-			return format("0x%016x", ulong.max >> ((8 - l) * 8));
-		}

 		string code;
 		for (size_t i = 0; i < tokens.length; i++)
 		{
-			immutable mask = generateMask(cast (const ubyte[]) tokens[i]);
-			if (tokens[i].length >= 8)
-				code ~= "if (frontBytes == " ~ mask ~ ")\n";
-			else
-				code ~= "if ((frontBytes & " ~ generateByteMask(tokens[i].length) ~ ") == " ~ mask ~ ")\n";
-			code ~= "{\n";
-			if (staticTokens.countUntil(tokens[i]) >= 0)
+			size_t j = i + 1;
+			size_t o = i;
+			while (j < tokens.length && tokens[i][0] == tokens[j][0]) j++;
+			code ~= format("case 0x%02x:\n", cast(ubyte) tokens[i][0]);
+			code ~= printCase(tokens[i .. j]);
+			i = j - 1;
+		}
+		return code;
+	}
+
+	static string printCase(string[] tokens)
+	{
+		string[] t = tokens;
+		string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
+		import std.conv;
+
+		if (tokens.length == 1 && tokens[0].length == 1)
+		{
+			if (staticTokens.countUntil(tokens[0]) >= 0)
 			{
-				if (tokens[i].length <= 8)
+				return "    range.popFront();\n"
+					~ "    return Token(tok!\"" ~ escape(tokens[0]) ~ "\", null, line, column, index);\n";
+			}
+			else if (pseudoTokens.countUntil(tokens[0]) >= 0)
+			{
+				return "    return "
+					~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
+					~ "();\n";
+			}
+		}
+
+		string code;
+
+		foreach (i, token; sortedTokens)
+		{
+			immutable mask = generateMask(cast (const ubyte[]) token);
+			if (token.length >= 8)
+				code ~= "    if (frontBytes == " ~ mask ~ ")\n";
+			else
+				code ~= "    if ((frontBytes & " ~ generateByteMask(token.length) ~ ") == " ~ mask ~ ")\n";
+			code ~= "    {\n";
+			if (staticTokens.countUntil(token) >= 0)
+			{
+				if (token.length <= 8)
 				{
-					code ~= "    range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
-					code ~= "    return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
+					code ~= "        range.popFrontN(" ~ text(token.length) ~ ");\n";
+					code ~= "        return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
 				}
 				else
 				{
-					code ~= "    assert (false); // " ~ escape(tokens[i]) ~ "\n";
+					code ~= "        pragma(msg, \"long static tokens not supported\"); // " ~ escape(token) ~ "\n";
 				}
 			}
-			else if (pseudoTokens.countUntil(tokens[i]) >= 0)
+			else if (pseudoTokens.countUntil(token) >= 0)
 			{
-				if (tokens[i].length < 8)
+				if (token.length < 8)
 				{
-					code ~= "    return "
-						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1]
+					code ~= "        return "
+						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
 						~ "();\n";
 				}
 				else
 				{
-					code ~= "    if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\")\n";
-					code ~= "        return "
-						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1]
+					code ~= "        if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\")\n";
+					code ~= "            return "
+						~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
 						~ "();\n";
 				}
 			}
 			else
 			{
 				// possible default
-				if (tokens[i].length < 8)
+				if (token.length < 8)
 				{
-					code ~= "    if (isSeparating(" ~ text(tokens[i].length) ~ "))\n";
-					code ~= "    {\n";
-					code ~= "        range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
-					code ~= "        return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
-					code ~= "    }\n";
-					code ~= "    else\n";
-					code ~= "        goto defaultHandler;\n";
+					code ~= "        if (isSeparating(" ~ text(token.length) ~ "))\n";
+					code ~= "        {\n";
+					code ~= "            range.popFrontN(" ~ text(token.length) ~ ");\n";
+					code ~= "            return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
+					code ~= "        }\n";
+					code ~= "        else\n";
+					code ~= "            goto default;\n";
 				}
 				else
 				{
-					code ~= "    if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\" && isSeparating(" ~ text(tokens[i].length) ~ "))\n";
-					code ~= "    {\n";
-					code ~= "        range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
-					code ~= "        return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
-					code ~= "    }\n";
-					code ~= "    else\n";
-					code ~= "        goto defaultHandler;\n";
+					code ~= "        if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\" && isSeparating(" ~ text(token.length) ~ "))\n";
+					code ~= "        {\n";
+					code ~= "            range.popFrontN(" ~ text(token.length) ~ ");\n";
+					code ~= "            return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
+					code ~= "        }\n";
+					code ~= "        else\n";
+					code ~= "            goto default;\n";
 				}
 			}
-			code ~= "}\n";
-
+			code ~= "    }\n";
 		}
+		code ~= "    else\n";
+		code ~= "        goto default;\n";
 		return code;
 	}

@ -325,15 +365,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		return retVal;
 	}

-	enum tokenSearch = generateCaseStatements(stupidToArray(sort!"a.length > b.length"(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));
+	enum tokenSearch = generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));

 	static ulong getFront(const ubyte[] arr) pure nothrow @trusted
 	{
 		import std.stdio;
 		immutable importantBits = *(cast (ulong*) arr.ptr);
 		immutable filler = ulong.max >> ((8 - arr.length) * 8);
-
-		debug(1) try { writefln("0x%016x", importantBits & filler); } catch (Exception e) {}
 		return importantBits & filler;
 	}

@ -345,10 +383,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 		immutable size_t column = range.column;
 		immutable size_t line = range.line;
 		immutable ulong frontBytes = getFront(range.peek(7));
+		switch (frontBytes & 0x00000000_000000ff)
+		{
 		mixin(tokenSearch);
-		pragma(msg, tokenSearch);
-	defaultHandler:
-		return defaultTokenFunction();
+		/+pragma(msg, tokenSearch);+/
+		default:
+			return defaultTokenFunction();
+		}
 	}

 	LexerRange range;
@ -398,16 +439,16 @@ struct LexerRange
 			: bytes[index .. index + p + 1];
 	}

+	ubyte peekAt(size_t offset) const nothrow pure @safe
+	{
+		return bytes[index + offset];
+	}
+
 	bool canPeek(size_t p) const nothrow pure @safe
 	{
 		return index + p < bytes.length;
 	}

-	LexerRange save() const nothrow pure @safe
-	{
-		return LexerRange(bytes, index, column, line);
-	}
-
 	void popFront() pure nothrow @safe
 	{
 		index++;
@ -501,7 +542,7 @@ public:
 	}
 	body
 	{
-		memoryRequested += bytes.length;
+		debug memoryRequested += bytes.length;
 		const(Item)* found = find(bytes, hash);
 		if (found is null)
 			return intern(bytes, hash);
@ -528,7 +569,7 @@ public:
 		return items[index].str;
 	}

-	void printStats()
+	debug void printStats()
 	{
 		import std.stdio;
 		writeln("Load Factor:           ", cast(float) items.length / cast(float) buckets.length);
@ -550,7 +591,7 @@ private:
 	{
 		immutable size_t newBucketCount = items.length * 2;
 		buckets = new Item*[newBucketCount];
-		rehashCount++;
+		debug rehashCount++;
 		foreach (item; items)
 		{
 			immutable size_t newIndex = item.hash % newBucketCount;
@ -707,6 +748,6 @@ private:
 	Item*[] items;
 	Item*[] buckets;
 	Block[] blocks;
-	size_t memoryRequested;
-	uint rehashCount;
+	debug size_t memoryRequested;
+	debug uint rehashCount;
 }