Merged

2013-02-22 13:28:40 -08:00 · 2013-02-22 13:28:40 -08:00 · d637e533bc
parent 12369feea7 15062f7c89
commit d637e533bc
3 changed files with 2204 additions and 2184 deletions
--- a/main.d
+++ b/main.d
@ -149,7 +149,10 @@ int main(string[] args)
 			config.fileName = arg;
 			uint count;
            auto f = File(arg);
-            ubyte[] buffer = uninitializedArray!(ubyte[])(f.size);
+            import core.stdc.stdlib;
            ubyte[] buffer = (cast(ubyte*)malloc(f.size))[0..f.size];
            scope(exit) free(buffer.ptr);
            //uninitializedArray!(ubyte[])(f.size);
 			foreach (t; byToken(f.rawRead(buffer), config))
            {
                if (tokenCount)
@ -183,8 +186,6 @@ void printHelp(string programName)
 `
    Usage: %s options
 options:
    --help | -h
        Prints this help message
@ -235,10 +236,7 @@ options:
        of a filename.
    --recursive | -R | -r directory
-        When used with --ctags or --highlight, dscanner will produce ctags/html
+        When used with --ctags, dscanner will produce ctags output for all .d
-        output for all .d and .di files contained within directory and its
+        and .di files contained within directory and its sub-directories.`,
        sub-directories. When used with --imports, dscanner will output all
        modules imported by the given file as well as any modules publically
        imported by any imported modules.`,
        programName);
 }
--- a/std/d/entities.d
+++ b/std/d/entities.d
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -2010,15 +2010,18 @@ private:
 						return idx;
 				}
 			}
-			auto chunk = buffer[0..idx];
+            //TODO: avoid looking up as UTF string, use raw bytes
-			auto entity = cast(string)chunk in characterEntities;
+			string chunk = cast(string)buffer[0..idx];
-			if (entity is null)
+            auto names = assumeSorted(map!"a.name"(characterEntities));
            auto place = names.lowerBound(chunk).length;
 			if (place == names.length || names[place] != chunk)
 			{
 				errorMessage("Invalid character entity \"&%s;\""
 					.format(cast(string) chunk));
 				return 1;
 			}
-			dest.put(cast(ubyte[]) (*entity)[0..$]);
+            auto entity = characterEntities[place].value;
 			dest.put(cast(ubyte[]) entity);
 			return entity.length;
 		default:
 			errorMessage("Invalid escape sequence");
@ -3048,40 +3051,33 @@ struct StringCache
 		if(isRandomAccessRange!R
 			&& is(Unqual!(ElementType!R) : const(ubyte)))
 	{
-		size_t bucket;
+		
-		hash_t h;
+		uint h = hash(range);
-		string* val = find(range, bucket, h);
+		uint bucket = h % mapSize;
-		if (val !is null)
+        Slot *s = &index[bucket];
        //1st slot not yet initialized?
        if(s.value.ptr == null) 
        {
            *s = Slot(putIntoCache(range), null, h);
            return s.value;
        }
        Slot* insSlot = s;
 		for(;;)
 		{
-			return *val;
+			if(s.hash == h && s.value.equal(range))
-		}
+                return s.value;
-		else
+            insSlot = s;
-		{
+            s = s.next;
-			auto s = putIntoCache(range);
+            if(s == null) break;
 			index[bucket] ~= s;
 			return s;
 		}		
        string str = putIntoCache(range);
        insertIntoSlot(insSlot, str, h);
        return str;
 	}
 private:
-	import core.stdc.string;
+	static uint hash(R)(R data)
 	string* find(R)(R data, out size_t bucket, out hash_t h)
 	{
 		h = hash(data);
 		bucket = h % mapSize;
 		foreach (i; 0 .. index[bucket].length)
 		{
 			if (equal(index[bucket][i], data))
 			{
 				return &index[bucket][i];
 			}
 		}
 		return null;
 	}
 	static hash_t hash(R)(R data)
 	{
 		uint hash = 0;
 		foreach (b; data)
@ -3093,28 +3089,54 @@ private:
 	}
 	enum mapSize = 2048;
-	string[][mapSize] index;
+    
    struct Slot
    {
        string value;
        Slot* next;
        uint hash;
    };
    void insertIntoSlot(Slot* tgt, string val, uint hash)
    {
        auto slice = allocateInCache(Slot.sizeof);
        auto newSlot = cast(Slot*)slice.ptr;
        *newSlot = Slot(val, null, hash);
        tgt.next = newSlot;
    }
    Slot[mapSize] index;
 	// leave some slack for alloctors/GC meta-data
 	enum chunkSize = 16*1024 - size_t.sizeof*8;
 	ubyte*[] chunkS;
 	size_t next = chunkSize;
-	string putIntoCache(R)(R data)
+    ubyte[] allocateInCache(size_t size)
-	{
+    {
-		import core.memory;
+        import core.memory;
-
+        if(next + size > chunkSize)
 		if(next + data.length > chunkSize)
 		{
-			// avoid huge strings
+			// avoid huge allocations
-			if(data.length > chunkSize/4)
+			if(size> chunkSize/4)
-				return (cast(char[])data).idup;
+            {
                ubyte* p = cast(ubyte*)GC.malloc(size,
                    GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
                return p[0..size];
            }
 			chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
 				GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
 			next = 0;
 		}
-		auto slice = chunkS[$-1][next..next+data.length];
+        auto slice = chunkS[$-1][next..next+size];
 		next += size;
        return slice;
    }
 	string putIntoCache(R)(R data)
 	{
        auto slice = allocateInCache(data.length);
 		slice[] = data[];
 		next += data.length;
 		return cast(string)slice;
 	}