Back-end cleanup and optimization in the lexer

2014-01-19 23:13:13 -08:00 · 2014-01-19 23:13:13 -08:00 · c01c51a61e
parent 24a0c1bc2b
commit c01c51a61e
9 changed files with 344 additions and 232 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
 [submodule "datapicked"]
 	path = datapicked
 	url = https://github.com/blackwhale/datapicked.git
--- a/build.sh
+++ b/build.sh
@ -1,4 +1,3 @@
 #dmd *.d stdx/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner-dmd
 dmd\
 	main.d\
 	stats.d\
@ -11,9 +10,36 @@ dmd\
 	style.d\
 	stdx/*.d\
 	stdx/d/*.d\
-	datapicked/dpick/buffer/*.d\
+	-ofdscanner\
-	-Idatapicked\
+	-m64\
-	-g -m64 -wi -ofdscanner
+	-O -release -noboundscheck
-#ldc2 main.d stats.d imports.d highlighter.d ctags.d astprinter.d formatter.d outliner.d stdx/*.d stdx/d/*.d -of=dscanner-ldc -m64 -oq
+
-#ldc2 *.d stdx/d/*.d -of=dscanner -unittest -m64 -g
+#gdc\
-#/opt/gdc/bin/gdc -O3 -odscanner-gdc -fno-bounds-check -frelease -m64 *.d stdx/d/*.d
+#	main.d\
 #	stats.d\
 #	imports.d\
 #	highlighter.d\
 #	ctags.d\
 #	astprinter.d\
 #	formatter.d\
 #	outliner.d\
 #	style.d\
 #	stdx/*.d\
 #	stdx/d/*.d\
 #	-O3 -frelease -fno-bounds-check\
 #	-odscanner\
 #ldc2\
 #	main.d\
 #	stats.d\
 #	imports.d\
 #	highlighter.d\
 #	ctags.d\
 #	astprinter.d\
 #	formatter.d\
 #	outliner.d\
 #	style.d\
 #	stdx/*.d\
 #	stdx/d/*.d\
 #	-O3 -release\
 #	-oq -of=dscanner\
--- a/ctags.d
+++ b/ctags.d
@ -20,7 +20,7 @@ void printCtags(File output, string[] fileNames)
 {
 	string[] tags;
 	LexerConfig config;
-	StringCache* cache = new StringCache;
+	StringCache* cache = new StringCache(StringCache.defaultBucketCount);
 	foreach (fileName; fileNames)
 	{
 		File f = File(fileName);
--- a/1
+++ b/1
@ -1 +0,0 @@
 Subproject commit f63a843e9c0ce8db7fd897684fe323697255d87d
--- a/main.d
+++ b/main.d
@ -10,15 +10,12 @@ import std.array;
 import std.conv;
 import std.file;
 import std.getopt;
 import std.parallelism;
 import std.path;
 import std.regex;
 import std.stdio;
 import std.range;
 import stdx.lexer;
 import stdx.d.lexer;
 import stdx.d.parser;
 import dpick.buffer.buffer;
 import highlighter;
 import stats;
@ -93,7 +90,7 @@ int main(string[] args)
 		return 1;
 	}
-	StringCache* cache = new StringCache;
+	StringCache* cache = new StringCache(StringCache.defaultBucketCount);
 	if (tokenDump || highlight)
 	{
@ -151,13 +148,16 @@ int main(string[] args)
 				foreach (f; expandArgs(args, recursive))
 				{
 					import core.memory;
-					GC.disable();
+					LexerConfig config;
-					auto tokens = byToken!(ubyte[])(readFile(f));
+					config.whitespaceBehavior = WhitespaceBehavior.skip;
 					config.stringBehavior = StringBehavior.source;
 					config.commentBehavior = CommentBehavior.include;
 					auto tokens = byToken(readFile(f), config, cache);
 					if (tokenCount)
 						count += printTokenCount(stdout, f, tokens);
 					else
 						count += printLineCount(stdout, f, tokens);
-					GC.enable();
+					cache.printStats();
 				}
 				writefln("total:\t%d", count);
 			}
--- a/perftest.sh
+++ b/perftest.sh
@ -1,9 +0,0 @@
 echo -e "file\tstd.d.lexer dmd\tstd.d.lexer ldc\tstd.d.lexer gdc\tdmd"
 for i in $(ls ../phobos/std/*.d); do
 	f=$(echo $i | sed "s/.*phobos\///")
 	dmdt=$(avgtime -q -r 200 ./dscanner-dmd --tokenCount $i | grep "Median" | sed "s/.*: //")
 	ldct=$(avgtime -q -r 200 ./dscanner-ldc --tokenCount $i | grep "Median" | sed "s/.*: //")
 	gdct=$(avgtime -q -r 200 ./dscanner-gdc --tokenCount $i | grep "Median" | sed "s/.*: //")
 	gcct=$(avgtime -q -r 200 ~/src/dmd-lexer/src/dmd $i | grep "Median" | sed "s/.*: //")
 	echo -e "${f}\t${dmdt}\t${ldct}\t${gdct}\t${gcct}"
 done
--- a/stats.d
+++ b/stats.d
@ -32,7 +32,12 @@ pure nothrow bool isLineOfCode(IdType t)
 ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
 {
-	ulong c = tokens.count!(a => true);
+
 	ulong c;
 	foreach (ref t; tokens)
 	{
 		c++;
 	}
 	output.writefln("%s:\t%d", fileName, c);
 	return c;
 }
@ -40,7 +45,7 @@ ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
 ulong printLineCount(Tokens)(File output, string fileName, ref Tokens tokens)
 {
 	ulong count;
-	foreach (t; tokens)
+	foreach (ref t; tokens)
 	{
 		if (isLineOfCode(t.type))
 			++count;
--- a/stdx/d/lexer.d
+++ b/stdx/d/lexer.d
@ -112,33 +112,6 @@ public struct LexerConfig
 	CommentBehavior commentBehavior;
 }
 public auto byToken(R)(R range)
 {
    LexerConfig config;
 	StringCache* cache = new StringCache;
    return byToken(range, config, cache);
 }
 public auto byToken(R)(R range, StringCache* cache)
 {
 	LexerConfig config;
 	return DLexer!(R)(range, config, cache);
 }
 public auto byToken(R)(R range, const LexerConfig config, StringCache* cache)
 {
 	return DLexer!(R)(range, config, cache);
 }
 unittest
 {
    import std.stdio;
    auto source = cast(ubyte[]) q{ import std.stdio;}c;
    auto tokens = byToken(source);
    assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
        tok!"identifier", tok!";"]));
 }
 public bool isBasicType(IdType type) nothrow pure @safe
 {
 	switch (type)
@ -396,11 +369,9 @@ public bool isProtection(IdType type) pure nothrow @safe
 	}
 }
-public struct DLexer(R)
+public struct DLexer
 {
 	import std.conv;
 	import core.vararg;
 	import dpick.buffer.buffer;
 	private enum pseudoTokenHandlers = [
 		"\"", "lexStringLiteral",
@ -434,14 +405,12 @@ public struct DLexer(R)
 		"#line", "lexSpecialTokenSequence"
 	];
-	mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens,
+	mixin Lexer!(IdType, Token, lexIdentifier, staticTokens,
 		dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens);
-	private alias Mark = typeof(range).Mark;
+	this(ubyte[] range, const LexerConfig config, StringCache* cache)
 	this(R range, const LexerConfig config, StringCache* cache)
 	{
-		this.range = LexerRange!(typeof(buffer(range)))(buffer(range));
+		this.range = LexerRange(range);
 		this.config = config;
 		this.cache = cache;
 		popFront();
@ -493,7 +462,7 @@ public struct DLexer(R)
 		case '\t':
 			return true;
 		case 0xe2:
-			auto peek = range.lookahead(2);
+			auto peek = range.peek(2);
 			return peek.length == 2
 				&& peek[0] == 0x80
 				&& (peek[1] == 0xa8 || peek[1] == 0xa9);
@ -521,7 +490,7 @@ public struct DLexer(R)
 			range.incrementLine();
 			return;
 		case 0xe2:
-			auto lookahead = range.lookahead(3);
+			auto lookahead = range.peek(3);
 			if (lookahead.length == 3 && lookahead[1] == 0x80
 				&& (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
 			{
@ -564,7 +533,7 @@ public struct DLexer(R)
 				range.popFront();
 				break;
 			case 0xe2:
-				auto lookahead = range.lookahead(3);
+				auto lookahead = range.peek(3);
 				if (lookahead.length != 3)
 					break loop;
 				if (lookahead[1] != 0x80)
@ -590,10 +559,10 @@ public struct DLexer(R)
 	Token lexNumber() pure nothrow
 	{
 		mixin (tokenStart);
-		auto lookahead = range.lookahead(2);
+		if (range.canPeek(1) && range.front == '0')
 		if (range.front == '0' && lookahead.length == 2)
 		{
-			switch (lookahead[1])
+			auto ahead = range.peek(1)[1];
 			switch (ahead)
 			{
 			case 'x':
 			case 'X':
@ -619,7 +588,7 @@ public struct DLexer(R)
 		return lexHex(mark, line, column, index);
 	}
-	Token lexHex(Mark mark, size_t line, size_t column, size_t index) pure nothrow
+	Token lexHex(size_t mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		IdType type = tok!"intLiteral";
 		bool foundDot;
@ -654,7 +623,7 @@ public struct DLexer(R)
 			case '.':
 				if (foundDot)
 					break hexLoop;
-				if (range.lookahead(1).length && range.lookahead(1)[0] == '.')
+				if (range.peek(1).length && range.peek(1)[0] == '.')
 					break hexLoop;
 				range.popFront();
 				foundDot = true;
@ -674,7 +643,7 @@ public struct DLexer(R)
 		return lexBinary(mark, line, column, index);
 	}
-	Token lexBinary(Mark mark, size_t line, size_t column, size_t index) pure nothrow
+	Token lexBinary(size_t mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		IdType type = tok!"intLiteral";
 		binaryLoop: while (!range.empty)
@ -699,13 +668,13 @@ public struct DLexer(R)
 			index);
 	}
-	Token lexDecimal()
+	Token lexDecimal() pure nothrow
 	{
 		mixin (tokenStart);
 		return lexDecimal(mark, line, column, index);
 	}
-	Token lexDecimal(Mark mark, size_t line, size_t column, size_t index) pure nothrow
+	Token lexDecimal(size_t mark, size_t line, size_t column, size_t index) pure nothrow
 	{
 		bool foundDot = range.front == '.';
 		IdType type = tok!"intLiteral";
@ -748,7 +717,7 @@ public struct DLexer(R)
 			case '.':
 				if (foundDot)
 					break decimalLoop;
-				auto lookahead = range.lookahead(2);
+				auto lookahead = range.peek(2);
 				if (lookahead.length == 2 && lookahead[1] == '.')
 					break decimalLoop;
 				else
@ -1058,7 +1027,7 @@ public struct DLexer(R)
 			index);
 	}
-	void lexStringSuffix(ref IdType type) pure
+	void lexStringSuffix(ref IdType type) pure nothrow
 	{
 		if (range.empty)
 			type = tok!"stringLiteral";
@ -1080,8 +1049,8 @@ public struct DLexer(R)
 		mixin (tokenStart);
 		range.popFront();
 		range.popFront();
-		Unqual!(ElementEncodingType!R) open;
+		ubyte open;
-		Unqual!(ElementEncodingType!R) close;
+		ubyte close;
 		switch (range.front)
 		{
 		case '<':
@ -1109,8 +1078,8 @@ public struct DLexer(R)
 		}
 	}
-	Token lexNormalDelimitedString(Mark mark, size_t line, size_t column,
+	Token lexNormalDelimitedString(size_t mark, size_t line, size_t column,
-		size_t index, ElementEncodingType!R open, ElementEncodingType!R close)
+		size_t index, ubyte open, ubyte close)
 		pure nothrow
 	{
 		int depth = 1;
@ -1144,7 +1113,7 @@ public struct DLexer(R)
 		return Token(type, cache.cacheGet(range.slice(mark)), line, column, index);
 	}
-	Token lexHeredocString(Mark mark, size_t line, size_t column, size_t index)
+	Token lexHeredocString(size_t mark, size_t line, size_t column, size_t index)
 		pure nothrow
 	{
 		import std.regex;
@ -1158,7 +1127,7 @@ public struct DLexer(R)
 			if (isNewline())
 			{
 				popFrontWhitespaceAware();
-				if (range.lookahead(ident.text.length) == ident.text)
+				if (range.peek(ident.text.length) == ident.text)
 				{
 					foreach (i ; 0 .. ident.text.length)
 						range.popFront();
@ -1395,18 +1364,20 @@ public struct DLexer(R)
 	Token lexIdentifier() pure nothrow
 	{
 		mixin (tokenStart);
-		while (!range.empty && !isSeparating(range.front))
+		uint hash = 0;
 		while (!range.empty && !isSeparating(0))
 		{
 			hash = StringCache.hashStep(range.front, hash);
 			range.popFront();
 		}
-		return Token(tok!"identifier", cache.cacheGet(range.slice(mark)), line,
+		return Token(tok!"identifier", cache.cacheGet(range.slice(mark), hash), line,
 			column, index);
 	}
 	Token lexDot() pure nothrow
 	{
 		mixin (tokenStart);
-		auto lookahead = range.lookahead(1);
+		auto lookahead = range.peek(1);
 		if (lookahead.length == 0)
 		{
 			range.popFront();
@ -1447,22 +1418,25 @@ public struct DLexer(R)
 	{
 		if (range.front == '\n') return true;
 		if (range.front == '\r') return true;
-		auto lookahead = range.lookahead(3);
+		auto lookahead = range.peek(3);
 		if (lookahead.length == 0) return false;
 		if (lookahead == "\u2028" || lookahead == "\u2029")
 			return true;
 		return false;
 	}
-	bool isSeparating(ElementType!R c) nothrow pure @safe
+	bool isSeparating(size_t offset) const pure nothrow @safe
 	{
 		auto r = range.save();
 		r.popFrontN(offset);
 		auto c = r.front;
 		if (c <= 0x2f) return true;
 		if (c >= ':' && c <= '@') return true;
 		if (c >= '[' && c <= '^') return true;
 		if (c >= '{' && c <= '~') return true;
 		if (c == '`') return true;
-//		if (c & 0x80 && (range.lookahead(3) == "\u2028"
+		if (c & 0x80 && (r.peek(3) == "\u2028"
-//			|| range.lookahead(3) == "\u2029")) return true;
+			|| range.peek(3) == "\u2029")) return true;
 		return false;
 	}
@ -1470,17 +1444,43 @@ public struct DLexer(R)
 		size_t index = range.index;
 		size_t column = range.column;
 		size_t line = range.line;
-		const mark = range.mark();
+		auto mark = range.mark();
 	};
-	void error(...) pure {
+	void error(...) pure nothrow @safe {
 	}
-	void warning(...) pure {
+	void warning(...) pure nothrow @safe {
 	}
 	StringCache* cache;
 	LexerConfig config;
 }
 public auto byToken(ubyte[] range)
 {
 	LexerConfig config;
 	StringCache* cache = new StringCache(StringCache.defaultBucketCount);
 	return DLexer(range, config, cache);
 }
 public auto byToken(ubyte[] range, StringCache* cache)
 {
 	LexerConfig config;
 	return DLexer(range, config, cache);
 }
 public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
 {
 	return DLexer(range, config, cache);
 }
 unittest
 {
 	import std.stdio;
 	auto source = cast(ubyte[]) q{ import std.stdio;}c;
 	auto tokens = byToken(source);
 	assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
 		tok!"identifier", tok!";"]));
 }
--- a/stdx/lexer.d
+++ b/stdx/lexer.d
@ -17,8 +17,6 @@ import std.range;
 import std.traits;
 import std.conv;
 import std.math;
 import dpick.buffer.buffer;
 import dpick.buffer.traits;
 /**
 * Template for determining the type used for a token type. Selects the smallest
@ -191,12 +189,13 @@ public:
 	mixin (extraFields);
 }
-mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
+mixin template Lexer(IDType, Token, alias defaultTokenFunction,
 	alias staticTokens, alias dynamicTokens, alias pseudoTokens,
 	alias pseudoTokenHandlers, alias possibleDefaultTokens)
 {
 	static string generateCaseStatements(string[] tokens, size_t offset = 0)
 	{
 		import std.conv;
 		string code;
 		for (size_t i = 0; i < tokens.length; i++)
 		{
@ -216,9 +215,9 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 						code ~= generateLeaf(tokens[i], indent ~ "    ");
 					else
 					{
-						code ~= indent ~ "    if (range.lookahead(" ~ text(tokens[i].length) ~ ").length == 0)\n";
+						code ~= indent ~ "    if (!range.canPeek(" ~ text(tokens[i].length) ~ "))\n";
 						code ~= indent ~ "        goto outer_default;\n";
-						code ~= indent ~ "    if (range.lookahead(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~ "\")\n";
+						code ~= indent ~ "    if (range.peek(" ~ text(tokens[i].length - 1) ~ ") == \"" ~ escape(tokens[i]) ~ "\")\n";
 						code ~= indent ~ "    {\n";
 						code ~= generateLeaf(tokens[i], indent ~ "        ");
 						code ~= indent ~ "    }\n";
@ -228,11 +227,11 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 				}
 				else
 				{
-					code ~= indent ~ "    if (range.lookahead(" ~ text(offset + 2) ~ ").length == 0)\n";
+					code ~= indent ~ "    if (!range.canPeek(" ~ text(offset + 1) ~ "))\n";
 					code ~= indent ~ "    {\n";
 					code ~= generateLeaf(tokens[i][0 .. offset + 1], indent ~ "        ");
 					code ~= indent ~ "    }\n";
-					code ~= indent ~ "    switch (range.lookahead(" ~ text(offset + 2) ~ ")[" ~ text(offset + 1) ~ "])\n";
+					code ~= indent ~ "    switch (range.peek(" ~ text(offset + 1) ~ ")[" ~ text(offset + 1) ~ "])\n";
 					code ~= indent ~ "    {\n";
 					code ~= generateCaseStatements(tokens[i .. j], offset + 1);
 					code ~= indent ~ "    default:\n";
@ -247,6 +246,7 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 	static string generateLeaf(string token, string indent)
 	{
 		import std.conv;
 		static assert (pseudoTokenHandlers.length % 2 == 0,
 			"Each pseudo-token must have a matching function name.");
 		string code;
@ -262,7 +262,7 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 			code ~= indent ~ "return " ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1] ~ "();\n";
 		else if (possibleDefaultTokens.countUntil(token) >= 0)
 		{
-			code ~= indent ~ "if (range.lookahead(" ~ text(token.length + 1) ~ ").length == 0 || isSeparating(range.lookahead(" ~ text(token.length + 1) ~ ")[" ~ text(token.length) ~ "]))\n";
+			code ~= indent ~ "if (!range.canPeek(" ~ text(token.length + 1) ~ ") || isSeparating(" ~ text(token.length) ~ "))\n";
 			code ~= indent ~ "{\n";
 			if (token.length == 1)
 				code ~= indent ~ "    range.popFront();\n";
@ -278,7 +278,7 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 		return code;
 	}
-	const(Token) front() pure nothrow const @property
+	ref const(Token) front() pure nothrow const @property
 	{
 		return _front;
 	}
@ -312,7 +312,21 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 		return retVal;
 	}
-	Token advance() pure
+	/**
 	 * This only exists because the real array() can't be called at compile-time
 	 */
 	static string[] stupidToArray(R)(R range)
 	{
 		string[] retVal;
 		foreach (v; range)
 			retVal ~= v;
 		return retVal;
 	}
 	enum loopBody = generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));
 	auto ref Token advance() pure
 	{
 		if (range.empty)
 			return Token(tok!"\0");
@ -321,54 +335,87 @@ mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
 		immutable size_t line = range.line;
 		lexerLoop: switch (range.front)
 		{
-		mixin(generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens))));
+		mixin(loopBody);
-//		pragma(msg, generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens))));
+		/+pragma(msg, loopBody);+/
 		outer_default:
 		default:
 			return defaultTokenFunction();
 		}
 	}
-	/**
+	LexerRange range;
 	 * This only exists because the real array() can't be called at compile-time
 	 */
 	static T[] stupidToArray(R, T = ElementType!R)(R range)
 	{
 		T[] retVal;
 		foreach (v; range)
 			retVal ~= v;
 		return retVal;
 	}
 	LexerRange!(typeof(buffer(R.init))) range;
 	Token _front;
 }
-struct LexerRange(BufferType) if (isBuffer!BufferType)
+struct LexerRange
 {
-	this(BufferType r)
+
 	this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) pure nothrow @safe
 	{
-		this.range = r;
+		this.bytes = bytes;
-		index = 0;
+		this.index = index;
-		column = 1;
+		this.column = column;
-		line = 1;
+		this.line = line;
 	}
-	void popFront() pure
+	size_t mark() const nothrow pure @safe
 	{
 		return index;
 	}
 	void seek(size_t m) nothrow pure @safe
 	{
 		index = m;
 	}
 	const(ubyte)[] slice(size_t m) const nothrow pure @safe
 	{
 		return bytes[m .. index];
 	}
 	bool empty() const nothrow pure @safe
 	{
 		return index >= bytes.length;
 	}
 	ubyte front() const nothrow pure @safe
 	{
 		return bytes[index];
 	}
 	const(ubyte)[] peek(size_t p) const nothrow pure @safe
 	{
 		return bytes[index .. index + p + 1];
 	}
 	bool canPeek(size_t p) const nothrow pure @safe
 	{
 		return index + p < bytes.length;
 	}
 	LexerRange save() const nothrow pure @safe
 	{
 		return LexerRange(bytes, index, column, line);
 	}
 	void popFront() pure nothrow @safe
 	{
 		index++;
 		column++;
 		range.popFront();
 	}
-	void incrementLine() pure nothrow
+	void popFrontN(size_t n) pure nothrow @safe
 	{
 		index += n;
 	}
 	void incrementLine() pure nothrow @safe
 	{
 		column = 1;
 		line++;
 	}
-	BufferType range;
+	const(ubyte)[] bytes;
 	alias range this;
 	size_t index;
 	size_t column;
 	size_t line;
@ -388,6 +435,13 @@ struct StringCache
 {
 public:
 	@disable this();
 	this(size_t bucketCount = defaultBucketCount)
 	{
 		buckets = new Item*[bucketCount];
 	}
 	/**
 	 * Equivalent to calling cache() and get().
 	 * ---
@ -402,6 +456,11 @@ public:
 		return get(cache(bytes));
 	}
 	string cacheGet(const(ubyte[]) bytes, uint hash) pure nothrow @safe
 	{
 		return get(cache(bytes, hash));
 	}
 	/**
 	 * Caches a string.
 	 * Params: bytes = the string to cache
@ -416,6 +475,12 @@ public:
 	 * ---
 	 */
 	size_t cache(const(ubyte)[] bytes) pure nothrow @safe
 	{
 		immutable uint hash = hashBytes(bytes);
 		return cache(bytes, hash);
 	}
 	size_t cache(const(ubyte)[] bytes, uint hash) pure nothrow @safe
 	in
 	{
 		assert (bytes.length > 0);
@ -426,7 +491,7 @@ public:
 	}
 	body
 	{
-		immutable uint hash = hashBytes(bytes);
+		memoryRequested += bytes.length;
 		const(Item)* found = find(bytes, hash);
 		if (found is null)
 			return intern(bytes, hash);
@ -453,23 +518,58 @@ public:
 		return items[index].str;
 	}
 	void printStats()
 	{
 		import std.stdio;
 		writeln("Load Factor:           ", cast(float) items.length / cast(float) buckets.length);
 		writeln("Memory used by blocks: ", blocks.length * blockSize);
 		writeln("Memory requsted:       ", memoryRequested);
 		writeln("rehashes:              ", rehashCount);
 	}
 	static uint hashStep(ubyte b, uint h) pure nothrow @safe
 	{
 		return (h ^ sbox[b]) * 3;
 	}
 	static enum defaultBucketCount = 2048;
 private:
-	size_t intern(const(ubyte)[] bytes, uint hash) pure nothrow @safe
+	private void rehash() pure nothrow @safe
 	{
-		Item* item = new Item;
+		immutable size_t newBucketCount = items.length * 2;
-		item.hash = hash;
+		buckets = new Item*[newBucketCount];
-		item.str = allocate(bytes);
+		rehashCount++;
 		foreach (item; items)
 		{
 			immutable size_t newIndex = item.hash % newBucketCount;
 			item.next = buckets[newIndex];
 			buckets[newIndex] = item;
 		}
 	}
 	size_t intern(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
 	{
 		ubyte[] mem = allocate(bytes.length);
 		mem[] = bytes[];
 		Item* item = cast(Item*) allocate(Item.sizeof).ptr;
 		item.index = items.length;
 		item.str = cast(string) mem;
 		item.hash = hash;
 		item.next = buckets[hash % buckets.length];
 		immutable bool checkLoadFactor = item.next !is null;
 		buckets[hash % buckets.length] = item;
 		items ~= item;
-		buckets[hash % buckets.length] ~= item;
+		if (checkLoadFactor && (cast(float) items.length / cast(float) buckets.length) > 0.75)
 			rehash();
 		return item.index;
 	}
 	const(Item)* find(const(ubyte)[] bytes, uint hash) pure nothrow const @safe
 	{
 		immutable size_t index = hash % buckets.length;
-		foreach (item; buckets[index])
+		for (const(Item)* item = buckets[index]; item !is null; item = item.next)
 		{
 			if (item.hash == hash && bytes.equal(item.str))
 				return item;
@ -477,35 +577,27 @@ private:
 		return null;
 	}
-	string allocate(const(ubyte)[] bytes) pure nothrow @trusted
+	ubyte[] allocate(size_t byteCount) pure nothrow @trusted
 	out (retVal)
 	{
 		assert (retVal == bytes);
 	}
 	body
 	{
 		import core.memory;
-		if (bytes.length > (pageSize / 4))
+		if (byteCount > (blockSize / 4))
 		{
-			ubyte* memory = cast(ubyte*) GC.malloc(bytes.length, GC.BlkAttr.NO_SCAN);
+			ubyte* mem = cast(ubyte*) GC.malloc(byteCount, GC.BlkAttr.NO_SCAN);
-			memory[0 .. bytes.length] = bytes[];
+			return mem[0 .. byteCount];
 			return cast(string) memory[0..bytes.length];
 		}
 		foreach (ref block; blocks)
 		{
-			immutable size_t endIndex = block.used + bytes.length;
+			immutable size_t oldUsed = block.used;
-			if (endIndex > block.bytes.length)
+			immutable size_t end = oldUsed + byteCount;
 			if (end > block.bytes.length)
 				continue;
-			block.bytes[block.used .. endIndex] = bytes[];
+			block.used = end;
-			string slice = cast(string) block.bytes[block.used .. endIndex];
+			return block.bytes[oldUsed .. end];
 			block.used = endIndex;
 			return slice;
 		}
-		blocks.length = blocks.length + 1;
+		blocks ~= Block(
-		blocks[$ - 1].bytes = (cast(ubyte*) GC.malloc(pageSize, GC.BlkAttr.NO_SCAN))[0 .. pageSize];
+			(cast(ubyte*) GC.malloc(blockSize, GC.BlkAttr.NO_SCAN))[0 .. blockSize],
-		blocks[$ - 1].bytes[0 .. bytes.length] = bytes[];
+			byteCount);
-		blocks[$ - 1].used = bytes.length;
+		return blocks[$ - 1].bytes[0 .. byteCount];
 		return cast(string) blocks[$ - 1].bytes[0 .. bytes.length];
 	}
 	static uint hashBytes(const(ubyte)[] data) pure nothrow @safe
@ -524,6 +616,7 @@ private:
 		size_t index;
 		string str;
 		uint hash;
 		Item* next;
 	}
 	static struct Block
@ -532,10 +625,9 @@ private:
 		size_t used;
 	}
-	static enum pageSize = 4096 * 1024;
+	static enum blockSize = 1024 * 16;
 	static enum bucketCount = 2048;
-	static enum uint[] sbox = [
+	public static immutable uint[] sbox = [
 		0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53,
 		0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982,
 		0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56,
@ -603,6 +695,8 @@ private:
 	];
 	Item*[] items;
-	Item*[][bucketCount] buckets;
+	Item*[] buckets;
 	Block[] blocks;
 	size_t memoryRequested;
 	uint rehashCount;
 }
		`@ -1 +0,0 @@`
			`Subproject commit f63a843e9c0ce8db7fd897684fe323697255d87d`