This commit is contained in:
Hackerpilot 2013-02-22 13:28:40 -08:00
commit d637e533bc
3 changed files with 2204 additions and 2184 deletions

14
main.d
View File

@ -149,7 +149,10 @@ int main(string[] args)
config.fileName = arg; config.fileName = arg;
uint count; uint count;
auto f = File(arg); auto f = File(arg);
ubyte[] buffer = uninitializedArray!(ubyte[])(f.size); import core.stdc.stdlib;
ubyte[] buffer = (cast(ubyte*)malloc(f.size))[0..f.size];
scope(exit) free(buffer.ptr);
//uninitializedArray!(ubyte[])(f.size);
foreach (t; byToken(f.rawRead(buffer), config)) foreach (t; byToken(f.rawRead(buffer), config))
{ {
if (tokenCount) if (tokenCount)
@ -183,8 +186,6 @@ void printHelp(string programName)
` `
Usage: %s options Usage: %s options
options: options:
--help | -h --help | -h
Prints this help message Prints this help message
@ -235,10 +236,7 @@ options:
of a filename. of a filename.
--recursive | -R | -r directory --recursive | -R | -r directory
When used with --ctags or --highlight, dscanner will produce ctags/html When used with --ctags, dscanner will produce ctags output for all .d
output for all .d and .di files contained within directory and its and .di files contained within directory and its sub-directories.`,
sub-directories. When used with --imports, dscanner will output all
modules imported by the given file as well as any modules publically
imported by any imported modules.`,
programName); programName);
} }

File diff suppressed because it is too large Load Diff

View File

@ -2010,15 +2010,18 @@ private:
return idx; return idx;
} }
} }
auto chunk = buffer[0..idx]; //TODO: avoid looking up as UTF string, use raw bytes
auto entity = cast(string)chunk in characterEntities; string chunk = cast(string)buffer[0..idx];
if (entity is null) auto names = assumeSorted(map!"a.name"(characterEntities));
auto place = names.lowerBound(chunk).length;
if (place == names.length || names[place] != chunk)
{ {
errorMessage("Invalid character entity \"&%s;\"" errorMessage("Invalid character entity \"&%s;\""
.format(cast(string) chunk)); .format(cast(string) chunk));
return 1; return 1;
} }
dest.put(cast(ubyte[]) (*entity)[0..$]); auto entity = characterEntities[place].value;
dest.put(cast(ubyte[]) entity);
return entity.length; return entity.length;
default: default:
errorMessage("Invalid escape sequence"); errorMessage("Invalid escape sequence");
@ -3048,40 +3051,33 @@ struct StringCache
if(isRandomAccessRange!R if(isRandomAccessRange!R
&& is(Unqual!(ElementType!R) : const(ubyte))) && is(Unqual!(ElementType!R) : const(ubyte)))
{ {
size_t bucket;
hash_t h; uint h = hash(range);
string* val = find(range, bucket, h); uint bucket = h % mapSize;
if (val !is null) Slot *s = &index[bucket];
//1st slot not yet initialized?
if(s.value.ptr == null)
{
*s = Slot(putIntoCache(range), null, h);
return s.value;
}
Slot* insSlot = s;
for(;;)
{ {
return *val; if(s.hash == h && s.value.equal(range))
} return s.value;
else insSlot = s;
{ s = s.next;
auto s = putIntoCache(range); if(s == null) break;
index[bucket] ~= s;
return s;
} }
string str = putIntoCache(range);
insertIntoSlot(insSlot, str, h);
return str;
} }
private: private:
import core.stdc.string; static uint hash(R)(R data)
string* find(R)(R data, out size_t bucket, out hash_t h)
{
h = hash(data);
bucket = h % mapSize;
foreach (i; 0 .. index[bucket].length)
{
if (equal(index[bucket][i], data))
{
return &index[bucket][i];
}
}
return null;
}
static hash_t hash(R)(R data)
{ {
uint hash = 0; uint hash = 0;
foreach (b; data) foreach (b; data)
@ -3093,28 +3089,54 @@ private:
} }
enum mapSize = 2048; enum mapSize = 2048;
string[][mapSize] index;
struct Slot
{
string value;
Slot* next;
uint hash;
};
void insertIntoSlot(Slot* tgt, string val, uint hash)
{
auto slice = allocateInCache(Slot.sizeof);
auto newSlot = cast(Slot*)slice.ptr;
*newSlot = Slot(val, null, hash);
tgt.next = newSlot;
}
Slot[mapSize] index;
// leave some slack for alloctors/GC meta-data // leave some slack for alloctors/GC meta-data
enum chunkSize = 16*1024 - size_t.sizeof*8; enum chunkSize = 16*1024 - size_t.sizeof*8;
ubyte*[] chunkS; ubyte*[] chunkS;
size_t next = chunkSize; size_t next = chunkSize;
string putIntoCache(R)(R data) ubyte[] allocateInCache(size_t size)
{ {
import core.memory; import core.memory;
if(next + size > chunkSize)
if(next + data.length > chunkSize)
{ {
// avoid huge strings // avoid huge allocations
if(data.length > chunkSize/4) if(size> chunkSize/4)
return (cast(char[])data).idup; {
ubyte* p = cast(ubyte*)GC.malloc(size,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
return p[0..size];
}
chunkS ~= cast(ubyte*)GC.malloc(chunkSize, chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR); GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
next = 0; next = 0;
} }
auto slice = chunkS[$-1][next..next+data.length]; auto slice = chunkS[$-1][next..next+size];
next += size;
return slice;
}
string putIntoCache(R)(R data)
{
auto slice = allocateInCache(data.length);
slice[] = data[]; slice[] = data[];
next += data.length;
return cast(string)slice; return cast(string)slice;
} }