Merge pull request #34 from blackwhale/more-mojo
More flexible hash table with rehashing
This commit is contained in:
commit
ae604fd559
2
build.sh
2
build.sh
|
@ -1,5 +1,5 @@
|
||||||
dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner-dmd
|
dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner-dmd
|
||||||
#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest
|
#dmd *.d std/d/*.d -g -m64 -w -wi -ofdscanner -unittest
|
||||||
ldc2 -O3 *.d std/d/*.d -of=dscanner-ldc -release -m64
|
ldc2 -O3 *.d std/d/*.d -of=dscanner-ldc -release -m64
|
||||||
#ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g
|
#ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g
|
||||||
/opt/gdc/bin/gdc -O3 -odscanner-gdc -fno-bounds-check -frelease -m64 *.d std/d/*.d
|
/opt/gdc/bin/gdc -O3 -odscanner-gdc -fno-bounds-check -frelease -m64 *.d std/d/*.d
|
||||||
|
|
20
std/d/ast.d
20
std/d/ast.d
|
@ -163,19 +163,19 @@ class AssignExpression
|
||||||
{
|
{
|
||||||
assert (
|
assert (
|
||||||
operator == TokenType.assign
|
operator == TokenType.assign
|
||||||
|| operator == TokenType.plusEqual
|
|| operator == TokenType.plusEquals
|
||||||
|| operator == TokenType.minusEqual
|
|| operator == TokenType.minusEquals
|
||||||
|| operator == TokenType.mulEqual
|
|| operator == TokenType.mulEquals
|
||||||
|| operator == TokenType.divEqual
|
|| operator == TokenType.divEquals
|
||||||
|| operator == TokenType.modEqual
|
|| operator == TokenType.modEquals
|
||||||
|| operator == TokenType.bitAndEqual
|
|| operator == TokenType.bitAndEquals
|
||||||
|| operator == TokenType.bitOrEqual
|
|| operator == TokenType.bitOrEquals
|
||||||
|| operator == TokenType.xorEqual
|
|| operator == TokenType.xorEquals
|
||||||
|| operator == TokenType.catEqual
|
|| operator == TokenType.catEquals
|
||||||
|| operator == TokenType.shiftLeftEqual
|
|| operator == TokenType.shiftLeftEqual
|
||||||
|| operator == TokenType.shiftRightEqual
|
|| operator == TokenType.shiftRightEqual
|
||||||
|| operator == TokenType.unsignedShiftRightEqual
|
|| operator == TokenType.unsignedShiftRightEqual
|
||||||
|| operator == TokenType.powEqual
|
|| operator == TokenType.powEquals
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
167
std/d/lexer.d
167
std/d/lexer.d
|
@ -102,7 +102,7 @@
|
||||||
*
|
*
|
||||||
* Copyright: Brian Schott 2013
|
* Copyright: Brian Schott 2013
|
||||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
|
||||||
* Authors: Brian Schott, Dmitry Olshansky
|
* Authors: Brian Schott, Dmitry Olshansky
|
||||||
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
* Source: $(PHOBOSSRC std/d/_lexer.d)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -358,39 +358,6 @@ struct TokenRange(LexSrc)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Foreach operation
|
|
||||||
*/
|
|
||||||
int opApply(int delegate(Token) dg)
|
|
||||||
{
|
|
||||||
int result = 0;
|
|
||||||
while (!empty)
|
|
||||||
{
|
|
||||||
result = dg(front);
|
|
||||||
if (result)
|
|
||||||
break;
|
|
||||||
popFront();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Foreach operation
|
|
||||||
*/
|
|
||||||
int opApply(int delegate(size_t, Token) dg)
|
|
||||||
{
|
|
||||||
int result = 0;
|
|
||||||
int i = 0;
|
|
||||||
while (!empty)
|
|
||||||
{
|
|
||||||
result = dg(i, front);
|
|
||||||
if (result)
|
|
||||||
break;
|
|
||||||
popFront();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes the current token from the range
|
* Removes the current token from the range
|
||||||
*/
|
*/
|
||||||
|
@ -624,7 +591,6 @@ L_advance:
|
||||||
current.value = getTokenValue(current.type);
|
current.value = getTokenValue(current.type);
|
||||||
if (current.value is null)
|
if (current.value is null)
|
||||||
setTokenValue();
|
setTokenValue();
|
||||||
|
|
||||||
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
|
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
|
||||||
{
|
{
|
||||||
_empty = true;
|
_empty = true;
|
||||||
|
@ -1163,7 +1129,7 @@ L_advance:
|
||||||
void lexString()
|
void lexString()
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
assert (src.front == '"');
|
//assert (src.front == '"');
|
||||||
}
|
}
|
||||||
body
|
body
|
||||||
{
|
{
|
||||||
|
@ -1860,13 +1826,14 @@ L_advance:
|
||||||
|
|
||||||
this(LexSrc lex, LexerConfig cfg)
|
this(LexSrc lex, LexerConfig cfg)
|
||||||
{
|
{
|
||||||
src = move(lex); // lex is rhs
|
src = move(lex); // lex is r-value
|
||||||
lineNumber = 1;
|
lineNumber = 1;
|
||||||
column = 0;
|
column = 0;
|
||||||
_empty = false;
|
_empty = false;
|
||||||
config = move(cfg);
|
config = move(cfg); // ditto with cfg
|
||||||
|
cache = StringCache(initialTableSize);
|
||||||
}
|
}
|
||||||
|
enum initialTableSize = 2048;
|
||||||
Token current;
|
Token current;
|
||||||
uint lineNumber;
|
uint lineNumber;
|
||||||
uint column;
|
uint column;
|
||||||
|
@ -3041,31 +3008,40 @@ string generateCaseTrie(string[] args ...)
|
||||||
|
|
||||||
struct StringCache
|
struct StringCache
|
||||||
{
|
{
|
||||||
|
this(size_t startSize)
|
||||||
|
{
|
||||||
|
assert((startSize & (startSize-1)) == 0);
|
||||||
|
index = new Slot*[startSize];
|
||||||
|
}
|
||||||
|
|
||||||
string get(R)(R range)
|
string get(R)(R range)
|
||||||
if(isRandomAccessRange!R
|
if(isRandomAccessRange!R
|
||||||
&& is(Unqual!(ElementType!R) : const(ubyte)))
|
&& is(Unqual!(ElementType!R) : const(ubyte)))
|
||||||
{
|
{
|
||||||
|
|
||||||
uint h = hash(range);
|
uint h = hash(range);
|
||||||
uint bucket = h % mapSize;
|
uint bucket = h & (index.length-1);
|
||||||
Slot *s = &index[bucket];
|
Slot *s = index[bucket];
|
||||||
//1st slot not yet initialized?
|
if(s == null)
|
||||||
if (s.value.ptr == null)
|
|
||||||
{
|
{
|
||||||
*s = Slot(putIntoCache(range), null, h);
|
string str = putIntoCache(range);
|
||||||
return s.value;
|
index[bucket] = allocateSlot(str, h);
|
||||||
|
uniqueSlots++;
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
Slot* insSlot = s;
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if(s.hash == h && s.value.equal(range))
|
if(s.hash == h && s.value.equal(range))
|
||||||
return s.value;
|
return s.value;
|
||||||
insSlot = s;
|
if(s.next == null) break;
|
||||||
s = s.next;
|
s = s.next;
|
||||||
if(s == null) break;
|
|
||||||
}
|
}
|
||||||
string str = putIntoCache(range);
|
string str = putIntoCache(range);
|
||||||
insertIntoSlot(insSlot, str, h);
|
s.next = allocateSlot(str, h);
|
||||||
|
uniqueSlots++;
|
||||||
|
// had at least 1 item in this bucket
|
||||||
|
// and inserted another one - check load factor
|
||||||
|
if(uniqueSlots*loadDenom > index.length*loadQuot)
|
||||||
|
rehash();
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3082,8 +3058,6 @@ private:
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum mapSize = 2048;
|
|
||||||
|
|
||||||
struct Slot
|
struct Slot
|
||||||
{
|
{
|
||||||
string value;
|
string value;
|
||||||
|
@ -3091,21 +3065,98 @@ private:
|
||||||
uint hash;
|
uint hash;
|
||||||
};
|
};
|
||||||
|
|
||||||
void insertIntoSlot(Slot* tgt, string val, uint hash)
|
void printLoadFactor()
|
||||||
|
{
|
||||||
|
size_t cnt = 0, maxChain = 0;
|
||||||
|
foreach(Slot* s; index)
|
||||||
|
{
|
||||||
|
size_t chain = 0;
|
||||||
|
for(Slot* p = s; p; p = p.next)
|
||||||
|
{
|
||||||
|
chain++;
|
||||||
|
}
|
||||||
|
maxChain = max(chain, maxChain);
|
||||||
|
cnt += chain;
|
||||||
|
}
|
||||||
|
import std.stdio;
|
||||||
|
assert(cnt == uniqueSlots);
|
||||||
|
writefln("Load factor: %.3f; max bucket %d",
|
||||||
|
cast(double)cnt/index.length,
|
||||||
|
maxChain);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rehash()
|
||||||
|
{
|
||||||
|
//writefln("BEFORE (size = %d):", index.length);
|
||||||
|
//printLoadFactor();
|
||||||
|
size_t oldLen = index.length;
|
||||||
|
index.length *= 2;
|
||||||
|
for (size_t i = 0; i < oldLen; i++)
|
||||||
|
{
|
||||||
|
Slot* cur = index[i], prev;
|
||||||
|
while(cur)
|
||||||
|
{
|
||||||
|
//has extra bit set - move it out
|
||||||
|
if(cur.hash & oldLen)
|
||||||
|
{
|
||||||
|
if(prev == null)
|
||||||
|
{
|
||||||
|
Slot* r = cur;
|
||||||
|
index[i] = cur.next;
|
||||||
|
cur = cur.next;
|
||||||
|
insertIntoBucket(r, i + oldLen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Slot* r = removeLink(cur, prev);
|
||||||
|
insertIntoBucket(r, i + oldLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
prev = cur;
|
||||||
|
cur = cur.next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//writefln("AFTER (size = %d):", index.length);
|
||||||
|
//printLoadFactor();
|
||||||
|
}
|
||||||
|
|
||||||
|
static Slot* removeLink(ref Slot* cur, Slot* prev)
|
||||||
|
{
|
||||||
|
prev.next = cur.next;
|
||||||
|
Slot* r = cur;
|
||||||
|
cur = cur.next;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
//insert at front of bucket
|
||||||
|
void insertIntoBucket(Slot* what, size_t bucket)
|
||||||
|
{
|
||||||
|
what.next = null;
|
||||||
|
Slot* p = index[bucket];
|
||||||
|
what.next = p;
|
||||||
|
index[bucket] = what;
|
||||||
|
}
|
||||||
|
|
||||||
|
Slot* allocateSlot(string val, uint hash)
|
||||||
{
|
{
|
||||||
auto slice = allocateInCache(Slot.sizeof);
|
auto slice = allocateInCache(Slot.sizeof);
|
||||||
auto newSlot = cast(Slot*)slice.ptr;
|
auto newSlot = cast(Slot*)slice.ptr;
|
||||||
*newSlot = Slot(val, null, hash);
|
*newSlot = Slot(val, null, hash);
|
||||||
tgt.next = newSlot;
|
return newSlot;
|
||||||
}
|
}
|
||||||
|
|
||||||
Slot[mapSize] index;
|
Slot*[] index;
|
||||||
|
size_t uniqueSlots;
|
||||||
|
enum loadQuot = 2, loadDenom = 3;
|
||||||
|
|
||||||
// leave some slack for alloctors/GC meta-data
|
// leave some slack for alloctors/GC meta-data
|
||||||
enum chunkSize = 16*1024 - size_t.sizeof*8;
|
enum chunkSize = 16*1024 - size_t.sizeof*8;
|
||||||
ubyte*[] chunkS;
|
ubyte*[] chunkS;
|
||||||
size_t next = chunkSize;
|
size_t next = chunkSize;
|
||||||
|
//TODO: add aligned variant that allocates at word boundary
|
||||||
ubyte[] allocateInCache(size_t size)
|
ubyte[] allocateInCache(size_t size)
|
||||||
{
|
{
|
||||||
import core.memory;
|
import core.memory;
|
||||||
|
@ -3115,11 +3166,11 @@ private:
|
||||||
if(size> chunkSize/4)
|
if(size> chunkSize/4)
|
||||||
{
|
{
|
||||||
ubyte* p = cast(ubyte*)GC.malloc(size,
|
ubyte* p = cast(ubyte*)GC.malloc(size,
|
||||||
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
|
GC.BlkAttr.NO_SCAN);
|
||||||
return p[0..size];
|
return p[0..size];
|
||||||
}
|
}
|
||||||
chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
|
chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
|
||||||
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
|
GC.BlkAttr.NO_SCAN);
|
||||||
next = 0;
|
next = 0;
|
||||||
}
|
}
|
||||||
auto slice = chunkS[$-1][next..next+size];
|
auto slice = chunkS[$-1][next..next+size];
|
||||||
|
|
Loading…
Reference in New Issue