Merge pull request #34 from blackwhale/more-mojo

More flexible hash table with rehashing
This commit is contained in:
Hackerpilot 2013-03-10 00:10:28 -08:00
commit ae604fd559
3 changed files with 126 additions and 75 deletions

View File

@ -1,5 +1,5 @@
dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner-dmd dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner-dmd
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest #dmd *.d std/d/*.d -g -m64 -w -wi -ofdscanner -unittest
ldc2 -O3 *.d std/d/*.d -of=dscanner-ldc -release -m64 ldc2 -O3 *.d std/d/*.d -of=dscanner-ldc -release -m64
#ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g #ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g
/opt/gdc/bin/gdc -O3 -odscanner-gdc -fno-bounds-check -frelease -m64 *.d std/d/*.d /opt/gdc/bin/gdc -O3 -odscanner-gdc -fno-bounds-check -frelease -m64 *.d std/d/*.d

View File

@ -163,19 +163,19 @@ class AssignExpression
{ {
assert ( assert (
operator == TokenType.assign operator == TokenType.assign
|| operator == TokenType.plusEqual || operator == TokenType.plusEquals
|| operator == TokenType.minusEqual || operator == TokenType.minusEquals
|| operator == TokenType.mulEqual || operator == TokenType.mulEquals
|| operator == TokenType.divEqual || operator == TokenType.divEquals
|| operator == TokenType.modEqual || operator == TokenType.modEquals
|| operator == TokenType.bitAndEqual || operator == TokenType.bitAndEquals
|| operator == TokenType.bitOrEqual || operator == TokenType.bitOrEquals
|| operator == TokenType.xorEqual || operator == TokenType.xorEquals
|| operator == TokenType.catEqual || operator == TokenType.catEquals
|| operator == TokenType.shiftLeftEqual || operator == TokenType.shiftLeftEqual
|| operator == TokenType.shiftRightEqual || operator == TokenType.shiftRightEqual
|| operator == TokenType.unsignedShiftRightEqual || operator == TokenType.unsignedShiftRightEqual
|| operator == TokenType.powEqual || operator == TokenType.powEquals
); );
} }
} }

View File

@ -102,7 +102,7 @@
* *
* Copyright: Brian Schott 2013 * Copyright: Brian Schott 2013
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0) * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
* Authors: Brian Schott, Dmitry Olshansky * Authors: Brian Schott, Dmitry Olshansky
* Source: $(PHOBOSSRC std/d/_lexer.d) * Source: $(PHOBOSSRC std/d/_lexer.d)
*/ */
@ -358,39 +358,6 @@ struct TokenRange(LexSrc)
return r; return r;
} }
/**
* Foreach operation
*/
int opApply(int delegate(Token) dg)
{
int result = 0;
while (!empty)
{
result = dg(front);
if (result)
break;
popFront();
}
return result;
}
/**
* Foreach operation
*/
int opApply(int delegate(size_t, Token) dg)
{
int result = 0;
int i = 0;
while (!empty)
{
result = dg(i, front);
if (result)
break;
popFront();
}
return result;
}
/** /**
* Removes the current token from the range * Removes the current token from the range
*/ */
@ -624,7 +591,6 @@ L_advance:
current.value = getTokenValue(current.type); current.value = getTokenValue(current.type);
if (current.value is null) if (current.value is null)
setTokenValue(); setTokenValue();
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof) if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
{ {
_empty = true; _empty = true;
@ -1163,7 +1129,7 @@ L_advance:
void lexString() void lexString()
in in
{ {
assert (src.front == '"'); //assert (src.front == '"');
} }
body body
{ {
@ -1860,13 +1826,14 @@ L_advance:
this(LexSrc lex, LexerConfig cfg) this(LexSrc lex, LexerConfig cfg)
{ {
src = move(lex); // lex is rhs src = move(lex); // lex is r-value
lineNumber = 1; lineNumber = 1;
column = 0; column = 0;
_empty = false; _empty = false;
config = move(cfg); config = move(cfg); // ditto with cfg
cache = StringCache(initialTableSize);
} }
enum initialTableSize = 2048;
Token current; Token current;
uint lineNumber; uint lineNumber;
uint column; uint column;
@ -3041,31 +3008,40 @@ string generateCaseTrie(string[] args ...)
struct StringCache struct StringCache
{ {
this(size_t startSize)
{
assert((startSize & (startSize-1)) == 0);
index = new Slot*[startSize];
}
string get(R)(R range) string get(R)(R range)
if(isRandomAccessRange!R if(isRandomAccessRange!R
&& is(Unqual!(ElementType!R) : const(ubyte))) && is(Unqual!(ElementType!R) : const(ubyte)))
{ {
uint h = hash(range); uint h = hash(range);
uint bucket = h % mapSize; uint bucket = h & (index.length-1);
Slot *s = &index[bucket]; Slot *s = index[bucket];
//1st slot not yet initialized? if(s == null)
if (s.value.ptr == null)
{ {
*s = Slot(putIntoCache(range), null, h); string str = putIntoCache(range);
return s.value; index[bucket] = allocateSlot(str, h);
uniqueSlots++;
return str;
} }
Slot* insSlot = s;
for(;;) for(;;)
{ {
if(s.hash == h && s.value.equal(range)) if(s.hash == h && s.value.equal(range))
return s.value; return s.value;
insSlot = s; if(s.next == null) break;
s = s.next; s = s.next;
if(s == null) break;
} }
string str = putIntoCache(range); string str = putIntoCache(range);
insertIntoSlot(insSlot, str, h); s.next = allocateSlot(str, h);
uniqueSlots++;
// had at least 1 item in this bucket
// and inserted another one - check load factor
if(uniqueSlots*loadDenom > index.length*loadQuot)
rehash();
return str; return str;
} }
@ -3082,8 +3058,6 @@ private:
return hash; return hash;
} }
enum mapSize = 2048;
struct Slot struct Slot
{ {
string value; string value;
@ -3091,21 +3065,98 @@ private:
uint hash; uint hash;
}; };
void insertIntoSlot(Slot* tgt, string val, uint hash) void printLoadFactor()
{
size_t cnt = 0, maxChain = 0;
foreach(Slot* s; index)
{
size_t chain = 0;
for(Slot* p = s; p; p = p.next)
{
chain++;
}
maxChain = max(chain, maxChain);
cnt += chain;
}
import std.stdio;
assert(cnt == uniqueSlots);
writefln("Load factor: %.3f; max bucket %d",
cast(double)cnt/index.length,
maxChain);
}
void rehash()
{
//writefln("BEFORE (size = %d):", index.length);
//printLoadFactor();
size_t oldLen = index.length;
index.length *= 2;
for (size_t i = 0; i < oldLen; i++)
{
Slot* cur = index[i], prev;
while(cur)
{
//has extra bit set - move it out
if(cur.hash & oldLen)
{
if(prev == null)
{
Slot* r = cur;
index[i] = cur.next;
cur = cur.next;
insertIntoBucket(r, i + oldLen);
}
else
{
Slot* r = removeLink(cur, prev);
insertIntoBucket(r, i + oldLen);
}
}
else
{
prev = cur;
cur = cur.next;
}
}
}
//writefln("AFTER (size = %d):", index.length);
//printLoadFactor();
}
static Slot* removeLink(ref Slot* cur, Slot* prev)
{
prev.next = cur.next;
Slot* r = cur;
cur = cur.next;
return r;
}
//insert at front of bucket
void insertIntoBucket(Slot* what, size_t bucket)
{
what.next = null;
Slot* p = index[bucket];
what.next = p;
index[bucket] = what;
}
Slot* allocateSlot(string val, uint hash)
{ {
auto slice = allocateInCache(Slot.sizeof); auto slice = allocateInCache(Slot.sizeof);
auto newSlot = cast(Slot*)slice.ptr; auto newSlot = cast(Slot*)slice.ptr;
*newSlot = Slot(val, null, hash); *newSlot = Slot(val, null, hash);
tgt.next = newSlot; return newSlot;
} }
Slot[mapSize] index; Slot*[] index;
size_t uniqueSlots;
enum loadQuot = 2, loadDenom = 3;
// leave some slack for alloctors/GC meta-data // leave some slack for alloctors/GC meta-data
enum chunkSize = 16*1024 - size_t.sizeof*8; enum chunkSize = 16*1024 - size_t.sizeof*8;
ubyte*[] chunkS; ubyte*[] chunkS;
size_t next = chunkSize; size_t next = chunkSize;
//TODO: add aligned variant that allocates at word boundary
ubyte[] allocateInCache(size_t size) ubyte[] allocateInCache(size_t size)
{ {
import core.memory; import core.memory;
@ -3115,11 +3166,11 @@ private:
if(size> chunkSize/4) if(size> chunkSize/4)
{ {
ubyte* p = cast(ubyte*)GC.malloc(size, ubyte* p = cast(ubyte*)GC.malloc(size,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR); GC.BlkAttr.NO_SCAN);
return p[0..size]; return p[0..size];
} }
chunkS ~= cast(ubyte*)GC.malloc(chunkSize, chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR); GC.BlkAttr.NO_SCAN);
next = 0; next = 0;
} }
auto slice = chunkS[$-1][next..next+size]; auto slice = chunkS[$-1][next..next+size];