Attempt to fix some issues with string interning

This commit is contained in:
Hackerpilot 2014-05-18 08:13:23 +00:00
parent 7a2d5f7e73
commit 19dc7c707f
4 changed files with 71 additions and 84 deletions

View File

@ -20,7 +20,7 @@ void printCtags(File output, string[] fileNames)
{ {
string[] tags; string[] tags;
LexerConfig config; LexerConfig config;
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount); StringCache* cache = new StringCache(StringCache.defaultBucketCount);
foreach (fileName; fileNames) foreach (fileName; fileNames)
{ {
File f = File(fileName); File f = File(fileName);

2
main.d
View File

@ -102,7 +102,7 @@ int run(string[] args)
return 1; return 1;
} }
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount); StringCache* cache = new StringCache(StringCache.defaultBucketCount);
if (tokenDump || highlight) if (tokenDump || highlight)
{ {

View File

@ -415,7 +415,7 @@ public struct DLexer
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens, mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
keywords, pseudoTokenHandlers); keywords, pseudoTokenHandlers);
this(ubyte[] range, const LexerConfig config, shared(StringCache)* cache) this(ubyte[] range, const LexerConfig config, StringCache* cache)
{ {
this.range = LexerRange(range); this.range = LexerRange(range);
this.config = config; this.config = config;
@ -1543,24 +1543,24 @@ public struct DLexer
} }
Message[] messages; Message[] messages;
shared(StringCache)* cache; StringCache* cache;
LexerConfig config; LexerConfig config;
} }
public auto byToken(ubyte[] range) public auto byToken(ubyte[] range)
{ {
LexerConfig config; LexerConfig config;
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount); StringCache* cache = new StringCache(StringCache.defaultBucketCount);
return DLexer(range, config, cache); return DLexer(range, config, cache);
} }
public auto byToken(ubyte[] range, shared(StringCache)* cache) public auto byToken(ubyte[] range, StringCache* cache)
{ {
LexerConfig config; LexerConfig config;
return DLexer(range, config, cache); return DLexer(range, config, cache);
} }
public auto byToken(ubyte[] range, const LexerConfig config, shared(StringCache)* cache) public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
{ {
return DLexer(range, config, cache); return DLexer(range, config, cache);
} }

View File

@ -773,34 +773,33 @@ struct LexerRange
size_t line; size_t line;
} }
shared struct StringCache struct StringCache
{ {
import core.sync.mutex;
public: public:
@disable this(); @disable this();
/** /**
* Params: bucketCount = the initial number of buckets. * Params: bucketCount = the initial number of buckets. Must be a
* power of two
*/ */
this(size_t bucketCount) this(size_t bucketCount)
{ {
buckets = cast(shared) new Node*[bucketCount]; buckets = (cast(Node**) calloc((void*).sizeof * bucketCount))[0 .. bucketCount];
allocating = false;
} }
~this() ~this()
{ {
import core.memory; Block* current = rootBlock;
shared(Block)* current = rootBlock;
while (current !is null) while (current !is null)
{ {
shared(Block)* prev = current; Block* prev = current;
current = current.next; current = current.next;
free(cast(void*) prev.bytes.ptr); free(cast(void*) prev.bytes.ptr);
free(cast(void*) prev);
} }
rootBlock = null; rootBlock = null;
buckets = []; buckets = null;
} }
/** /**
@ -845,6 +844,13 @@ public:
body body
{ {
return _intern(str, hash); return _intern(str, hash);
// string s = _intern(str, hash);
// size_t* ptr = s in debugMap;
// if (ptr is null)
// debugMap[s] = cast(size_t) s.ptr;
// else
// assert (*ptr == cast(size_t) s.ptr);
// return s;
} }
/** /**
@ -864,7 +870,10 @@ public:
*/ */
static enum defaultBucketCount = 2048; static enum defaultBucketCount = 2048;
size_t allocated; size_t allocated() pure nothrow @safe @property
{
return _allocated;
}
private: private:
@ -872,44 +881,30 @@ private:
{ {
if (bytes is null || bytes.length == 0) if (bytes is null || bytes.length == 0)
return ""; return "";
import core.atomic; immutable size_t index = hash & (buckets.length - 1);
shared ubyte[] mem; Node* s = find(bytes, hash);
shared(Node*)* oldBucketRoot = &buckets[hash % buckets.length]; if (s !is null)
while (true) return cast(string) s.str;
{ _allocated += bytes.length;
bool found; ubyte[] mem = allocate(bytes.length);
shared(Node)* s = find(bytes, hash, found); mem[] = bytes[];
shared(Node)* n = s is null ? null : s.next; Node* node = cast(Node*) malloc(Node.sizeof);
if (found) node.str = mem;
return cast(string) s.str; node.hash = hash;
if (mem.length == 0) node.next = buckets[index];
{ buckets[index] = node;
atomicOp!"+="(allocated, bytes.length);
mem = allocate(bytes.length);
mem[] = bytes[];
}
shared(Node)* node = new shared Node(mem, hash, null);
if (s is null && cas(oldBucketRoot, *oldBucketRoot, node))
break;
node.next = s.next;
if (cas(&s.next, n, node))
break;
}
return cast(string) mem; return cast(string) mem;
} }
shared(Node)* find(const(ubyte)[] bytes, uint hash, ref bool found) pure nothrow @trusted Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
{ {
import std.algorithm; import std.algorithm;
immutable size_t index = hash % buckets.length; immutable size_t index = hash & (buckets.length - 1);
shared(Node)* node = buckets[index]; Node* node = buckets[index];
while (node !is null) while (node !is null)
{ {
if (node.hash >= hash && bytes.equal(cast(ubyte[]) node.str)) if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
{
found = true;
return node; return node;
}
node = node.next; node = node.next;
} }
return node; return node;
@ -932,7 +927,7 @@ private:
return hash; return hash;
} }
shared(ubyte[]) allocate(immutable size_t numBytes) pure nothrow @trusted ubyte[] allocate(size_t numBytes) pure nothrow @trusted
in in
{ {
assert (numBytes != 0); assert (numBytes != 0);
@ -943,54 +938,44 @@ private:
} }
body body
{ {
import core.atomic;
import core.memory;
if (numBytes > (blockSize / 4)) if (numBytes > (blockSize / 4))
return cast(shared) (cast(ubyte*) malloc(numBytes))[0 .. numBytes]; return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
shared(Block)* r = rootBlock; Block* r = rootBlock;
while (true) size_t i = 0;
while (i <= 3 && r !is null)
{ {
while (r !is null)
immutable size_t available = r.bytes.length;
immutable size_t oldUsed = r.used;
immutable size_t newUsed = oldUsed + numBytes;
if (newUsed <= available)
{ {
while (true) r.used = newUsed;
{ return r.bytes[oldUsed .. newUsed];
immutable size_t available = r.bytes.length;
immutable size_t oldUsed = atomicLoad(r.used);
immutable size_t newUsed = oldUsed + numBytes;
if (newUsed > available)
break;
if (cas(&r.used, oldUsed, newUsed))
return r.bytes[oldUsed .. newUsed];
}
r = r.next;
}
if (cas(&allocating, false, true))
{
shared(Block)* b = new shared Block(
cast(shared) (cast(ubyte*) malloc(blockSize))[0 .. blockSize],
numBytes,
r);
atomicStore(rootBlock, b);
atomicStore(allocating, false);
r = rootBlock;
return b.bytes[0 .. numBytes];
} }
i++;
r = r.next;
} }
assert (0); Block* b = cast(Block*) malloc(Block.sizeof);
b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
b.used = numBytes;
b.next = rootBlock;
rootBlock = b;
return b.bytes[0 .. numBytes];
} }
static shared struct Node static struct Node
{ {
ubyte[] str; ubyte[] str;
uint hash; uint hash;
shared(Node)* next; Node* next;
} }
static shared struct Block static struct Block
{ {
ubyte[] bytes; ubyte[] bytes;
size_t used; size_t used;
shared(Block)* next; Block* next;
} }
static enum blockSize = 1024 * 16; static enum blockSize = 1024 * 16;
@ -1062,10 +1047,12 @@ private:
0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A, 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
]; ];
shared bool allocating; // deprecated size_t[string] debugMap;
shared(Node)*[] buckets; size_t _allocated;
shared(Block)* rootBlock; Node*[] buckets;
Block* rootBlock;
} }
private extern(C) void* calloc(size_t) nothrow pure;
private extern(C) void* malloc(size_t) nothrow pure; private extern(C) void* malloc(size_t) nothrow pure;
private extern(C) void free(void*) nothrow pure; private extern(C) void free(void*) nothrow pure;