Attempt to fix some issues with string interning
This commit is contained in:
parent
7a2d5f7e73
commit
19dc7c707f
2
ctags.d
2
ctags.d
|
@ -20,7 +20,7 @@ void printCtags(File output, string[] fileNames)
|
||||||
{
|
{
|
||||||
string[] tags;
|
string[] tags;
|
||||||
LexerConfig config;
|
LexerConfig config;
|
||||||
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
|
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
|
||||||
foreach (fileName; fileNames)
|
foreach (fileName; fileNames)
|
||||||
{
|
{
|
||||||
File f = File(fileName);
|
File f = File(fileName);
|
||||||
|
|
2
main.d
2
main.d
|
@ -102,7 +102,7 @@ int run(string[] args)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
|
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
|
||||||
|
|
||||||
if (tokenDump || highlight)
|
if (tokenDump || highlight)
|
||||||
{
|
{
|
||||||
|
|
|
@ -415,7 +415,7 @@ public struct DLexer
|
||||||
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
|
mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
|
||||||
keywords, pseudoTokenHandlers);
|
keywords, pseudoTokenHandlers);
|
||||||
|
|
||||||
this(ubyte[] range, const LexerConfig config, shared(StringCache)* cache)
|
this(ubyte[] range, const LexerConfig config, StringCache* cache)
|
||||||
{
|
{
|
||||||
this.range = LexerRange(range);
|
this.range = LexerRange(range);
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
@ -1543,24 +1543,24 @@ public struct DLexer
|
||||||
}
|
}
|
||||||
|
|
||||||
Message[] messages;
|
Message[] messages;
|
||||||
shared(StringCache)* cache;
|
StringCache* cache;
|
||||||
LexerConfig config;
|
LexerConfig config;
|
||||||
}
|
}
|
||||||
|
|
||||||
public auto byToken(ubyte[] range)
|
public auto byToken(ubyte[] range)
|
||||||
{
|
{
|
||||||
LexerConfig config;
|
LexerConfig config;
|
||||||
shared(StringCache)* cache = new shared StringCache(StringCache.defaultBucketCount);
|
StringCache* cache = new StringCache(StringCache.defaultBucketCount);
|
||||||
return DLexer(range, config, cache);
|
return DLexer(range, config, cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
public auto byToken(ubyte[] range, shared(StringCache)* cache)
|
public auto byToken(ubyte[] range, StringCache* cache)
|
||||||
{
|
{
|
||||||
LexerConfig config;
|
LexerConfig config;
|
||||||
return DLexer(range, config, cache);
|
return DLexer(range, config, cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
public auto byToken(ubyte[] range, const LexerConfig config, shared(StringCache)* cache)
|
public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
|
||||||
{
|
{
|
||||||
return DLexer(range, config, cache);
|
return DLexer(range, config, cache);
|
||||||
}
|
}
|
||||||
|
|
141
std/lexer.d
141
std/lexer.d
|
@ -773,34 +773,33 @@ struct LexerRange
|
||||||
size_t line;
|
size_t line;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared struct StringCache
|
struct StringCache
|
||||||
{
|
{
|
||||||
import core.sync.mutex;
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@disable this();
|
@disable this();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Params: bucketCount = the initial number of buckets.
|
* Params: bucketCount = the initial number of buckets. Must be a
|
||||||
|
* power of two
|
||||||
*/
|
*/
|
||||||
this(size_t bucketCount)
|
this(size_t bucketCount)
|
||||||
{
|
{
|
||||||
buckets = cast(shared) new Node*[bucketCount];
|
buckets = (cast(Node**) calloc((void*).sizeof * bucketCount))[0 .. bucketCount];
|
||||||
allocating = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~this()
|
~this()
|
||||||
{
|
{
|
||||||
import core.memory;
|
Block* current = rootBlock;
|
||||||
shared(Block)* current = rootBlock;
|
|
||||||
while (current !is null)
|
while (current !is null)
|
||||||
{
|
{
|
||||||
shared(Block)* prev = current;
|
Block* prev = current;
|
||||||
current = current.next;
|
current = current.next;
|
||||||
free(cast(void*) prev.bytes.ptr);
|
free(cast(void*) prev.bytes.ptr);
|
||||||
|
free(cast(void*) prev);
|
||||||
}
|
}
|
||||||
rootBlock = null;
|
rootBlock = null;
|
||||||
buckets = [];
|
buckets = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -845,6 +844,13 @@ public:
|
||||||
body
|
body
|
||||||
{
|
{
|
||||||
return _intern(str, hash);
|
return _intern(str, hash);
|
||||||
|
// string s = _intern(str, hash);
|
||||||
|
// size_t* ptr = s in debugMap;
|
||||||
|
// if (ptr is null)
|
||||||
|
// debugMap[s] = cast(size_t) s.ptr;
|
||||||
|
// else
|
||||||
|
// assert (*ptr == cast(size_t) s.ptr);
|
||||||
|
// return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -864,7 +870,10 @@ public:
|
||||||
*/
|
*/
|
||||||
static enum defaultBucketCount = 2048;
|
static enum defaultBucketCount = 2048;
|
||||||
|
|
||||||
size_t allocated;
|
size_t allocated() pure nothrow @safe @property
|
||||||
|
{
|
||||||
|
return _allocated;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -872,44 +881,30 @@ private:
|
||||||
{
|
{
|
||||||
if (bytes is null || bytes.length == 0)
|
if (bytes is null || bytes.length == 0)
|
||||||
return "";
|
return "";
|
||||||
import core.atomic;
|
immutable size_t index = hash & (buckets.length - 1);
|
||||||
shared ubyte[] mem;
|
Node* s = find(bytes, hash);
|
||||||
shared(Node*)* oldBucketRoot = &buckets[hash % buckets.length];
|
if (s !is null)
|
||||||
while (true)
|
return cast(string) s.str;
|
||||||
{
|
_allocated += bytes.length;
|
||||||
bool found;
|
ubyte[] mem = allocate(bytes.length);
|
||||||
shared(Node)* s = find(bytes, hash, found);
|
mem[] = bytes[];
|
||||||
shared(Node)* n = s is null ? null : s.next;
|
Node* node = cast(Node*) malloc(Node.sizeof);
|
||||||
if (found)
|
node.str = mem;
|
||||||
return cast(string) s.str;
|
node.hash = hash;
|
||||||
if (mem.length == 0)
|
node.next = buckets[index];
|
||||||
{
|
buckets[index] = node;
|
||||||
atomicOp!"+="(allocated, bytes.length);
|
|
||||||
mem = allocate(bytes.length);
|
|
||||||
mem[] = bytes[];
|
|
||||||
}
|
|
||||||
shared(Node)* node = new shared Node(mem, hash, null);
|
|
||||||
if (s is null && cas(oldBucketRoot, *oldBucketRoot, node))
|
|
||||||
break;
|
|
||||||
node.next = s.next;
|
|
||||||
if (cas(&s.next, n, node))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return cast(string) mem;
|
return cast(string) mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared(Node)* find(const(ubyte)[] bytes, uint hash, ref bool found) pure nothrow @trusted
|
Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
|
||||||
{
|
{
|
||||||
import std.algorithm;
|
import std.algorithm;
|
||||||
immutable size_t index = hash % buckets.length;
|
immutable size_t index = hash & (buckets.length - 1);
|
||||||
shared(Node)* node = buckets[index];
|
Node* node = buckets[index];
|
||||||
while (node !is null)
|
while (node !is null)
|
||||||
{
|
{
|
||||||
if (node.hash >= hash && bytes.equal(cast(ubyte[]) node.str))
|
if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
|
||||||
{
|
|
||||||
found = true;
|
|
||||||
return node;
|
return node;
|
||||||
}
|
|
||||||
node = node.next;
|
node = node.next;
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
|
@ -932,7 +927,7 @@ private:
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared(ubyte[]) allocate(immutable size_t numBytes) pure nothrow @trusted
|
ubyte[] allocate(size_t numBytes) pure nothrow @trusted
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
assert (numBytes != 0);
|
assert (numBytes != 0);
|
||||||
|
@ -943,54 +938,44 @@ private:
|
||||||
}
|
}
|
||||||
body
|
body
|
||||||
{
|
{
|
||||||
import core.atomic;
|
|
||||||
import core.memory;
|
|
||||||
if (numBytes > (blockSize / 4))
|
if (numBytes > (blockSize / 4))
|
||||||
return cast(shared) (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
|
return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
|
||||||
shared(Block)* r = rootBlock;
|
Block* r = rootBlock;
|
||||||
while (true)
|
size_t i = 0;
|
||||||
|
while (i <= 3 && r !is null)
|
||||||
{
|
{
|
||||||
while (r !is null)
|
|
||||||
|
immutable size_t available = r.bytes.length;
|
||||||
|
immutable size_t oldUsed = r.used;
|
||||||
|
immutable size_t newUsed = oldUsed + numBytes;
|
||||||
|
if (newUsed <= available)
|
||||||
{
|
{
|
||||||
while (true)
|
r.used = newUsed;
|
||||||
{
|
return r.bytes[oldUsed .. newUsed];
|
||||||
immutable size_t available = r.bytes.length;
|
|
||||||
immutable size_t oldUsed = atomicLoad(r.used);
|
|
||||||
immutable size_t newUsed = oldUsed + numBytes;
|
|
||||||
if (newUsed > available)
|
|
||||||
break;
|
|
||||||
if (cas(&r.used, oldUsed, newUsed))
|
|
||||||
return r.bytes[oldUsed .. newUsed];
|
|
||||||
}
|
|
||||||
r = r.next;
|
|
||||||
}
|
|
||||||
if (cas(&allocating, false, true))
|
|
||||||
{
|
|
||||||
shared(Block)* b = new shared Block(
|
|
||||||
cast(shared) (cast(ubyte*) malloc(blockSize))[0 .. blockSize],
|
|
||||||
numBytes,
|
|
||||||
r);
|
|
||||||
atomicStore(rootBlock, b);
|
|
||||||
atomicStore(allocating, false);
|
|
||||||
r = rootBlock;
|
|
||||||
return b.bytes[0 .. numBytes];
|
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
|
r = r.next;
|
||||||
}
|
}
|
||||||
assert (0);
|
Block* b = cast(Block*) malloc(Block.sizeof);
|
||||||
|
b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
|
||||||
|
b.used = numBytes;
|
||||||
|
b.next = rootBlock;
|
||||||
|
rootBlock = b;
|
||||||
|
return b.bytes[0 .. numBytes];
|
||||||
}
|
}
|
||||||
|
|
||||||
static shared struct Node
|
static struct Node
|
||||||
{
|
{
|
||||||
ubyte[] str;
|
ubyte[] str;
|
||||||
uint hash;
|
uint hash;
|
||||||
shared(Node)* next;
|
Node* next;
|
||||||
}
|
}
|
||||||
|
|
||||||
static shared struct Block
|
static struct Block
|
||||||
{
|
{
|
||||||
ubyte[] bytes;
|
ubyte[] bytes;
|
||||||
size_t used;
|
size_t used;
|
||||||
shared(Block)* next;
|
Block* next;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum blockSize = 1024 * 16;
|
static enum blockSize = 1024 * 16;
|
||||||
|
@ -1062,10 +1047,12 @@ private:
|
||||||
0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
|
0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
|
||||||
];
|
];
|
||||||
|
|
||||||
shared bool allocating;
|
// deprecated size_t[string] debugMap;
|
||||||
shared(Node)*[] buckets;
|
size_t _allocated;
|
||||||
shared(Block)* rootBlock;
|
Node*[] buckets;
|
||||||
|
Block* rootBlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private extern(C) void* calloc(size_t) nothrow pure;
|
||||||
private extern(C) void* malloc(size_t) nothrow pure;
|
private extern(C) void* malloc(size_t) nothrow pure;
|
||||||
private extern(C) void free(void*) nothrow pure;
|
private extern(C) void free(void*) nothrow pure;
|
||||||
|
|
Loading…
Reference in New Issue