Attempt to fix some issues with string interning

This commit is contained in:
Hackerpilot 2014-05-18 08:13:23 +00:00
parent 7a2d5f7e73
commit 19dc7c707f
4 changed files with 71 additions and 84 deletions

View File

@ -20,7 +20,7 @@ void printCtags(File output, string[] fileNames)
{
string[] tags;
LexerConfig config;
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
foreach (fileName; fileNames)
{
File f = File(fileName);

2
main.d
View File

@ -102,7 +102,7 @@ int run(string[] args)
return 1;
}
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
if (tokenDump || highlight)
{

View File

@ -415,7 +415,7 @@ public struct DLexer
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
keywords, pseudoTokenHandlers);
this(ubyte[] range, const LexerConfig config, shared(StringCache)* cache)
this(ubyte[] range, const LexerConfig config, StringCache* cache)
{
this.range = LexerRange(range);
this.config = config;
@ -1543,24 +1543,24 @@ public struct DLexer
}
Message[] messages;
shared(StringCache)* cache;
StringCache* cache;
LexerConfig config;
}
public auto byToken(ubyte[] range)
{
LexerConfig config;
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
return DLexer(range, config, cache);
}
public auto byToken(ubyte[] range, shared(StringCache)* cache)
public auto byToken(ubyte[] range, StringCache* cache)
{
LexerConfig config;
return DLexer(range, config, cache);
}
public auto byToken(ubyte[] range, const LexerConfig config, shared(StringCache)* cache)
public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
{
return DLexer(range, config, cache);
}

View File

@ -773,34 +773,33 @@ struct LexerRange
size_t line;
}
shared struct StringCache
struct StringCache
{
import core.sync.mutex;
public:
@disable this();
/**
* Params: bucketCount = the initial number of buckets.
* Params: bucketCount = the initial number of buckets. Must be a
* power of two
*/
this(size_t bucketCount)
{
buckets = cast(shared) new Node*[bucketCount];
allocating = false;
buckets = (cast(Node**) calloc((void*).sizeof * bucketCount))[0 .. bucketCount];
}
~this()
{
import core.memory;
shared(Block)* current = rootBlock;
Block* current = rootBlock;
while (current !is null)
{
shared(Block)* prev = current;
Block* prev = current;
current = current.next;
free(cast(void*) prev.bytes.ptr);
free(cast(void*) prev);
}
rootBlock = null;
buckets = [];
buckets = null;
}
/**
@ -845,6 +844,13 @@ public:
body
{
return _intern(str, hash);
// string s = _intern(str, hash);
// size_t* ptr = s in debugMap;
// if (ptr is null)
// debugMap[s] = cast(size_t) s.ptr;
// else
// assert (*ptr == cast(size_t) s.ptr);
// return s;
}
/**
@ -864,7 +870,10 @@ public:
*/
static enum defaultBucketCount = 2048;
size_t allocated;
size_t allocated() pure nothrow @safe @property
{
return _allocated;
}
private:
@ -872,44 +881,30 @@ private:
{
if (bytes is null || bytes.length == 0)
return "";
import core.atomic;
shared ubyte[] mem;
shared(Node*)* oldBucketRoot = &buckets[hash % buckets.length];
while (true)
{
bool found;
shared(Node)* s = find(bytes, hash, found);
shared(Node)* n = s is null ? null : s.next;
if (found)
return cast(string) s.str;
if (mem.length == 0)
{
atomicOp!"+="(allocated, bytes.length);
mem = allocate(bytes.length);
mem[] = bytes[];
}
shared(Node)* node = new shared Node(mem, hash, null);
if (s is null && cas(oldBucketRoot, *oldBucketRoot, node))
break;
node.next = s.next;
if (cas(&s.next, n, node))
break;
}
immutable size_t index = hash & (buckets.length - 1);
Node* s = find(bytes, hash);
if (s !is null)
return cast(string) s.str;
_allocated += bytes.length;
ubyte[] mem = allocate(bytes.length);
mem[] = bytes[];
Node* node = cast(Node*) malloc(Node.sizeof);
node.str = mem;
node.hash = hash;
node.next = buckets[index];
buckets[index] = node;
return cast(string) mem;
}
shared(Node)* find(const(ubyte)[] bytes, uint hash, ref bool found) pure nothrow @trusted
Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
{
import std.algorithm;
immutable size_t index = hash % buckets.length;
shared(Node)* node = buckets[index];
immutable size_t index = hash & (buckets.length - 1);
Node* node = buckets[index];
while (node !is null)
{
if (node.hash >= hash && bytes.equal(cast(ubyte[]) node.str))
{
found = true;
if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
return node;
}
node = node.next;
}
return node;
@ -932,7 +927,7 @@ private:
return hash;
}
shared(ubyte[]) allocate(immutable size_t numBytes) pure nothrow @trusted
ubyte[] allocate(size_t numBytes) pure nothrow @trusted
in
{
assert (numBytes != 0);
@ -943,54 +938,44 @@ private:
}
body
{
import core.atomic;
import core.memory;
if (numBytes > (blockSize / 4))
return cast(shared) (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
shared(Block)* r = rootBlock;
while (true)
return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
Block* r = rootBlock;
size_t i = 0;
while (i <= 3 && r !is null)
{
while (r !is null)
immutable size_t available = r.bytes.length;
immutable size_t oldUsed = r.used;
immutable size_t newUsed = oldUsed + numBytes;
if (newUsed <= available)
{
while (true)
{
immutable size_t available = r.bytes.length;
immutable size_t oldUsed = atomicLoad(r.used);
immutable size_t newUsed = oldUsed + numBytes;
if (newUsed > available)
break;
if (cas(&r.used, oldUsed, newUsed))
return r.bytes[oldUsed .. newUsed];
}
r = r.next;
}
if (cas(&allocating, false, true))
{
shared(Block)* b = new shared Block(
cast(shared) (cast(ubyte*) malloc(blockSize))[0 .. blockSize],
numBytes,
r);
atomicStore(rootBlock, b);
atomicStore(allocating, false);
r = rootBlock;
return b.bytes[0 .. numBytes];
r.used = newUsed;
return r.bytes[oldUsed .. newUsed];
}
i++;
r = r.next;
}
assert (0);
Block* b = cast(Block*) malloc(Block.sizeof);
b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
b.used = numBytes;
b.next = rootBlock;
rootBlock = b;
return b.bytes[0 .. numBytes];
}
static shared struct Node
static struct Node
{
ubyte[] str;
uint hash;
shared(Node)* next;
Node* next;
}
static shared struct Block
static struct Block
{
ubyte[] bytes;
size_t used;
shared(Block)* next;
Block* next;
}
static enum blockSize = 1024 * 16;
@ -1062,10 +1047,12 @@ private:
0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
];
shared bool allocating;
shared(Node)*[] buckets;
shared(Block)* rootBlock;
// deprecated size_t[string] debugMap;
size_t _allocated;
Node*[] buckets;
Block* rootBlock;
}
private extern(C) void* calloc(size_t) nothrow pure;
private extern(C) void* malloc(size_t) nothrow pure;
private extern(C) void free(void*) nothrow pure;