From ae91019ab495f8d52e7d3bdaee4f82558fcc928c Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Sun, 18 May 2014 01:59:02 -0700 Subject: [PATCH] Use string interning for a very nice speed boost --- actypes.d | 97 +++++++++++++++++++++++++++++++++------ autocomplete.d | 16 +++---- build.bat | 2 +- build.sh | 5 +- containers | 2 +- conversion/astconverter.d | 5 +- conversion/first.d | 49 +++++++++----------- conversion/second.d | 15 ------ conversion/third.d | 24 +++++----- dscanner | 2 +- modulecache.d | 16 +++---- server.d | 8 ++-- string_interning.d | 44 ++++++++++++++++++ 13 files changed, 184 insertions(+), 101 deletions(-) create mode 100644 string_interning.d diff --git a/actypes.d b/actypes.d index c52a150..3145fa4 100644 --- a/actypes.d +++ b/actypes.d @@ -21,16 +21,17 @@ module actypes; import std.algorithm; import std.array; import std.container; -import std.stdio; +//import std.stdio; import std.typecons; import std.allocator; -import containers.karytree; +import containers.ttree; import containers.unrolledlist; import containers.slist; import std.d.lexer; import messages; +import string_interning; /** * Any special information about a variable declaration symbol. @@ -62,7 +63,7 @@ public: */ this(string name) { - this.name = name; + this.name = name is null ? name : internString(name); } /** @@ -72,7 +73,7 @@ public: */ this(string name, CompletionKind kind) { - this.name = name; + this.name = name is null ? name : internString(name); this.kind = kind; } @@ -84,16 +85,18 @@ public: */ this(string name, CompletionKind kind, ACSymbol* type) { - this.name = name; + this.name = name is null ? name : internString(name); this.kind = kind; this.type = type; } int opCmp(ref const ACSymbol other) const { - if (name < other.name) + // Compare the pointers because the strings have been interned. + // Identical strings MUST have the same address + if (name.ptr < other.name.ptr) return -1; - if (name > other.name) + if (name.ptr > other.name.ptr) return 1; return 0; } @@ -111,7 +114,7 @@ public: * Symbols that compose this symbol, such as enum members, class variables, * methods, etc. */ - KAryTree!(ACSymbol*, true, "a < b", false) parts; + TTree!(ACSymbol*, true, "a < b", false) parts; /** * Symbol's name @@ -275,7 +278,7 @@ struct Scope size_t endLocation; /// Symbols contained in this scope - KAryTree!(ACSymbol*, true, "a < b", false) symbols; + TTree!(ACSymbol*, true, "a < b", false) symbols; } /** @@ -297,33 +300,94 @@ struct ImportInformation /** * Symbols for the built in types */ -KAryTree!(ACSymbol*, true) builtinSymbols; +TTree!(ACSymbol*, true, "a < b", false) builtinSymbols; /** * Array properties */ -KAryTree!(ACSymbol*, true) arraySymbols; +TTree!(ACSymbol*, true, "a < b", false) arraySymbols; /** * Associative array properties */ -KAryTree!(ACSymbol*, true) assocArraySymbols; +TTree!(ACSymbol*, true, "a < b", false) assocArraySymbols; /** * Enum, union, class, and interface properties */ -KAryTree!(ACSymbol*, true) aggregateSymbols; +TTree!(ACSymbol*, true, "a < b", false) aggregateSymbols; /** * Class properties */ -KAryTree!(ACSymbol*, true) classSymbols; +TTree!(ACSymbol*, true, "a < b", false) classSymbols; + +private immutable(string[24]) builtinTypeNames; + +string getBuiltinTypeName(IdType id) +{ + switch (id) + { + case tok!"int": return builtinTypeNames[0]; + case tok!"uint": return builtinTypeNames[1]; + case tok!"double": return builtinTypeNames[2]; + case tok!"idouble": return builtinTypeNames[3]; + case tok!"float": return builtinTypeNames[4]; + case tok!"ifloat": return builtinTypeNames[5]; + case tok!"short": return builtinTypeNames[6]; + case tok!"ushort": return builtinTypeNames[7]; + case tok!"long": return builtinTypeNames[8]; + case tok!"ulong": return builtinTypeNames[9]; + case tok!"char": return builtinTypeNames[10]; + case tok!"wchar": return builtinTypeNames[11]; + case tok!"dchar": return builtinTypeNames[12]; + case tok!"bool": return builtinTypeNames[13]; + case tok!"void": return builtinTypeNames[14]; + case tok!"cent": return builtinTypeNames[15]; + case tok!"ucent": return builtinTypeNames[16]; + case tok!"real": return builtinTypeNames[17]; + case tok!"ireal": return builtinTypeNames[18]; + case tok!"byte": return builtinTypeNames[19]; + case tok!"ubyte": return builtinTypeNames[20]; + case tok!"cdouble": return builtinTypeNames[21]; + case tok!"cfloat": return builtinTypeNames[22]; + case tok!"creal": return builtinTypeNames[23]; + default: assert (false); + } +} + /** * Initializes builtin types and the various properties of builtin types */ static this() { + builtinTypeNames[0] = internString("int"); + builtinTypeNames[1] = internString("uint"); + builtinTypeNames[2] = internString("double"); + builtinTypeNames[3] = internString("idouble"); + builtinTypeNames[4] = internString("float"); + builtinTypeNames[5] = internString("ifloat"); + builtinTypeNames[6] = internString("short"); + builtinTypeNames[7] = internString("ushort"); + builtinTypeNames[8] = internString("long"); + builtinTypeNames[9] = internString("ulong"); + builtinTypeNames[10] = internString("char"); + builtinTypeNames[11] = internString("wchar"); + builtinTypeNames[12] = internString("dchar"); + builtinTypeNames[13] = internString("bool"); + builtinTypeNames[14] = internString("void"); + builtinTypeNames[15] = internString("cent"); + builtinTypeNames[16] = internString("ucent"); + builtinTypeNames[17] = internString("real"); + builtinTypeNames[18] = internString("ireal"); + builtinTypeNames[19] = internString("byte"); + builtinTypeNames[20] = internString("ubyte"); + builtinTypeNames[21] = internString("cdouble"); + builtinTypeNames[22] = internString("cfloat"); + builtinTypeNames[23] = internString("creal"); + + auto bool_ = allocate!ACSymbol(Mallocator.it, "bool", CompletionKind.keyword); auto int_ = allocate!ACSymbol(Mallocator.it, "int", CompletionKind.keyword); auto long_ = allocate!ACSymbol(Mallocator.it, "long", CompletionKind.keyword); @@ -492,5 +556,10 @@ static this() builtinSymbols.insert(real_); builtinSymbols.insert(ucent_); builtinSymbols.insert(void_); + +// writeln(">>Builtin symbols"); +// foreach (symbol; builtinSymbols[]) +// writeln(symbol.name, " ", symbol.name.ptr); +// writeln("<> %s %016X", stringRepresentation, stringRepresentation.ptr); ACSymbol s = ACSymbol(stringRepresentation); + assert(s.name.ptr == stringRepresentation.ptr); +// writefln(">> %s %016X", s.name, s.name.ptr); return builtinSymbols.equalRange(&s).front(); } - - shared(StringCache)* stringCache; } diff --git a/dscanner b/dscanner index 8b4b2b3..19dc7c7 160000 --- a/dscanner +++ b/dscanner @@ -1 +1 @@ -Subproject commit 8b4b2b342f4e3c8dbead90353c6f7aa4800c2ce4 +Subproject commit 19dc7c707f857b104144e909d77543b2db1521b8 diff --git a/modulecache.d b/modulecache.d index 3ca45b7..05e4310 100644 --- a/modulecache.d +++ b/modulecache.d @@ -32,7 +32,7 @@ import std.path; import actypes; import semantic; import memory.allocators; -import containers.karytree; +import containers.ttree; import containers.hashset; import containers.unrolledlist; import conversion.astconverter; @@ -74,7 +74,6 @@ bool existanceCheck(A)(A path) static this() { - ModuleCache.stringCache = new shared StringCache(StringCache.defaultBucketCount); ModuleCache.symbolAllocator = new CAllocatorImpl!(BlockAllocator!(1024 * 16)); } @@ -115,6 +114,7 @@ struct ModuleCache */ static ACSymbol*[] getSymbolsInModule(string location) { + import string_interning; assert (location !is null); if (!needsReparsing(location)) { @@ -126,7 +126,7 @@ struct ModuleCache return []; } - string cachedLocation = stringCache.intern(location); + string cachedLocation = internString(location); Log.info("Getting symbols for ", cachedLocation); @@ -145,7 +145,7 @@ struct ModuleCache f.rawRead(source); LexerConfig config; config.fileName = cachedLocation; - shared parseStringCache = shared StringCache(StringCache.defaultBucketCount); + auto parseStringCache = StringCache(StringCache.defaultBucketCount); auto semanticAllocator = scoped!(CAllocatorImpl!(BlockAllocator!(1024 * 64))); DynamicArray!(Token, false) tokens; auto tokenRange = byToken( @@ -158,8 +158,8 @@ struct ModuleCache Module m = parseModuleSimple(tokens[], cachedLocation, semanticAllocator); assert (symbolAllocator); - auto first = scoped!FirstPass(m, cachedLocation, stringCache, - symbolAllocator, semanticAllocator); + auto first = scoped!FirstPass(m, cachedLocation, symbolAllocator, + semanticAllocator); first.run(); SecondPass second = SecondPass(first); @@ -234,8 +234,6 @@ struct ModuleCache return importPaths[]; } - static shared(StringCache)* stringCache; - static uint symbolsAllocated; private: @@ -264,7 +262,7 @@ private: } // Mapping of file paths to their cached symbols. - static KAryTree!(CacheEntry*) cache; + static TTree!(CacheEntry*) cache; static HashSet!string recursionGuard; diff --git a/server.d b/server.d index 7d6082b..d0da309 100644 --- a/server.d +++ b/server.d @@ -100,11 +100,9 @@ int main(string[] args) sw.stop(); Log.info("Startup completed in ", sw.peek().to!("msecs", float), " milliseconds"); - float internBytes = cast(float) ModuleCache.stringCache.allocated / (1024 * 1024); - Log.info("String interning took up ", internBytes, " megabytes"); - float symbolMegs = (cast(float) (ACSymbol.sizeof * ModuleCache.symbolsAllocated)) / (1024f * 1024f); - Log.info(ModuleCache.symbolsAllocated, " symbols allocated, taking up ", - symbolMegs, " megabytes"); +// float symbolMegs = (cast(float) (ACSymbol.sizeof * ModuleCache.symbolsAllocated)) / (1024f * 1024f); +// Log.info(ModuleCache.symbolsAllocated, " symbols allocated, taking up ", +// symbolMegs, " megabytes"); // No relative paths version (Posix) chdir("/"); diff --git a/string_interning.d b/string_interning.d new file mode 100644 index 0000000..244e5e1 --- /dev/null +++ b/string_interning.d @@ -0,0 +1,44 @@ +/** + * This file is part of DCD, a development tool for the D programming language. + * Copyright (C) 2014 Brian Schott + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +module string_interning; + +import std.lexer; + +string internString(string s) +{ +// import std.stdio; +// import std.string; +// size_t* p = s in dupCheck; +// auto r = stringCache.intern(s); + return stringCache.intern(s); +// if (p !is null) +// assert (*p == cast(size_t) r.ptr, format("%s, %016x, %016x", s, *p, r.ptr)); +// else +// dupCheck[s] = cast(size_t) r.ptr; +// stderr.writefln("%s\t%016x", r, r.ptr); +// return r; +} + +static this() +{ + stringCache = StringCache(StringCache.defaultBucketCount); +} + +//private size_t[string] dupCheck; +private StringCache stringCache = void;