Lots of optimization. Updated GDC portion of build script
This commit is contained in:
parent
a060dabde7
commit
4ec5af9093
6
build.sh
6
build.sh
|
@ -11,7 +11,7 @@ dmd\
|
||||||
stdx/d/*.d\
|
stdx/d/*.d\
|
||||||
analysis/*.d\
|
analysis/*.d\
|
||||||
-ofdscanner\
|
-ofdscanner\
|
||||||
-m64\
|
-m64 -g\
|
||||||
-O -release -noboundscheck -inline
|
-O -release -noboundscheck -inline
|
||||||
|
|
||||||
#gdc\
|
#gdc\
|
||||||
|
@ -23,9 +23,9 @@ dmd\
|
||||||
# astprinter.d\
|
# astprinter.d\
|
||||||
# formatter.d\
|
# formatter.d\
|
||||||
# outliner.d\
|
# outliner.d\
|
||||||
# style.d\
|
|
||||||
# stdx/*.d\
|
# stdx/*.d\
|
||||||
# stdx/d/*.d\
|
# stdx/d/*.d\
|
||||||
|
# analysis/*.d\
|
||||||
# -O3 -frelease -fno-bounds-check\
|
# -O3 -frelease -fno-bounds-check\
|
||||||
# -odscanner\
|
# -odscanner\
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ dmd\
|
||||||
# astprinter.d\
|
# astprinter.d\
|
||||||
# formatter.d\
|
# formatter.d\
|
||||||
# outliner.d\
|
# outliner.d\
|
||||||
# style.d\
|
|
||||||
# stdx/*.d\
|
# stdx/*.d\
|
||||||
# stdx/d/*.d\
|
# stdx/d/*.d\
|
||||||
|
# analysis/*.d\
|
||||||
# -O3 -release\
|
# -O3 -release\
|
||||||
# -oq -of=dscanner\
|
# -oq -of=dscanner\
|
||||||
|
|
3
main.d
3
main.d
|
@ -152,7 +152,7 @@ int main(string[] args)
|
||||||
ulong count;
|
ulong count;
|
||||||
foreach (f; expandArgs(args, recursive))
|
foreach (f; expandArgs(args, recursive))
|
||||||
{
|
{
|
||||||
import core.memory;
|
|
||||||
LexerConfig config;
|
LexerConfig config;
|
||||||
config.whitespaceBehavior = WhitespaceBehavior.skip;
|
config.whitespaceBehavior = WhitespaceBehavior.skip;
|
||||||
config.stringBehavior = StringBehavior.source;
|
config.stringBehavior = StringBehavior.source;
|
||||||
|
@ -162,7 +162,6 @@ int main(string[] args)
|
||||||
count += printTokenCount(stdout, f, tokens);
|
count += printTokenCount(stdout, f, tokens);
|
||||||
else
|
else
|
||||||
count += printLineCount(stdout, f, tokens);
|
count += printLineCount(stdout, f, tokens);
|
||||||
cache.printStats();
|
|
||||||
}
|
}
|
||||||
writefln("total:\t%d", count);
|
writefln("total:\t%d", count);
|
||||||
}
|
}
|
||||||
|
|
1
stats.d
1
stats.d
|
@ -32,7 +32,6 @@ pure nothrow bool isLineOfCode(IdType t)
|
||||||
|
|
||||||
ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
|
ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
|
||||||
{
|
{
|
||||||
|
|
||||||
ulong c;
|
ulong c;
|
||||||
foreach (ref t; tokens)
|
foreach (ref t; tokens)
|
||||||
{
|
{
|
||||||
|
|
|
@ -425,7 +425,6 @@ public struct DLexer
|
||||||
public void popFront() pure
|
public void popFront() pure
|
||||||
{
|
{
|
||||||
_popFront();
|
_popFront();
|
||||||
string comment = null;
|
|
||||||
switch (front.type)
|
switch (front.type)
|
||||||
{
|
{
|
||||||
case tok!"comment":
|
case tok!"comment":
|
||||||
|
@ -433,7 +432,11 @@ public struct DLexer
|
||||||
{
|
{
|
||||||
import std.string;
|
import std.string;
|
||||||
if (isDocComment(front.text))
|
if (isDocComment(front.text))
|
||||||
comment = comment == null ? front.text : format("%s\n%s", comment, front.text);
|
{
|
||||||
|
_front.comment = _front.comment == null
|
||||||
|
? front.text
|
||||||
|
: format("%s\n%s", _front.comment, front.text);
|
||||||
|
}
|
||||||
do _popFront(); while (front == tok!"comment");
|
do _popFront(); while (front == tok!"comment");
|
||||||
if (front == tok!"whitespace") goto case tok!"whitespace";
|
if (front == tok!"whitespace") goto case tok!"whitespace";
|
||||||
}
|
}
|
||||||
|
@ -448,7 +451,6 @@ public struct DLexer
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_front.comment = comment;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -715,17 +717,16 @@ public struct DLexer
|
||||||
lexExponent(type);
|
lexExponent(type);
|
||||||
break decimalLoop;
|
break decimalLoop;
|
||||||
case '.':
|
case '.':
|
||||||
if (foundDot || !range.canPeek(1) || range.peek(1)[1] == '.')
|
if (foundDot || !range.canPeek(1) || range.peekAt(1) == '.')
|
||||||
break decimalLoop;
|
break decimalLoop;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto lookahead = range.peek(1);
|
|
||||||
// The following bit of silliness tries to tell the
|
// The following bit of silliness tries to tell the
|
||||||
// difference between "int dot identifier" and
|
// difference between "int dot identifier" and
|
||||||
// "double identifier".
|
// "double identifier".
|
||||||
if (lookahead.length == 2)
|
if (range.canPeek(1))
|
||||||
{
|
{
|
||||||
switch (lookahead[1])
|
switch (range.peekAt(1))
|
||||||
{
|
{
|
||||||
case '0': .. case '9':
|
case '0': .. case '9':
|
||||||
goto doubleLiteral;
|
goto doubleLiteral;
|
||||||
|
@ -1362,7 +1363,6 @@ public struct DLexer
|
||||||
Token lexIdentifier() pure nothrow
|
Token lexIdentifier() pure nothrow
|
||||||
{
|
{
|
||||||
import std.stdio;
|
import std.stdio;
|
||||||
debug(1) try { writeln("lexIdentifier"); } catch (Exception e) {}
|
|
||||||
mixin (tokenStart);
|
mixin (tokenStart);
|
||||||
uint hash = 0;
|
uint hash = 0;
|
||||||
while (!range.empty && !isSeparating(0))
|
while (!range.empty && !isSeparating(0))
|
||||||
|
@ -1418,25 +1418,28 @@ public struct DLexer
|
||||||
{
|
{
|
||||||
if (range.front == '\n') return true;
|
if (range.front == '\n') return true;
|
||||||
if (range.front == '\r') return true;
|
if (range.front == '\r') return true;
|
||||||
auto lookahead = range.peek(3);
|
return (range.front & 0x80) && range.canPeek(2)
|
||||||
if (lookahead.length == 0) return false;
|
&& (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
|
||||||
if (lookahead == "\u2028" || lookahead == "\u2029")
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isSeparating(size_t offset) const pure nothrow @safe
|
bool isSeparating(size_t offset) pure nothrow @safe
|
||||||
{
|
{
|
||||||
auto r = range.save();
|
if (!range.canPeek(offset)) return false;
|
||||||
r.popFrontN(offset);
|
auto c = range.peekAt(offset);
|
||||||
auto c = r.front;
|
if (c >= 'A' && c <= 'Z') return false;
|
||||||
|
if (c >= 'a' && c <= 'z') return false;
|
||||||
if (c <= 0x2f) return true;
|
if (c <= 0x2f) return true;
|
||||||
if (c >= ':' && c <= '@') return true;
|
if (c >= ':' && c <= '@') return true;
|
||||||
if (c >= '[' && c <= '^') return true;
|
if (c >= '[' && c <= '^') return true;
|
||||||
if (c >= '{' && c <= '~') return true;
|
if (c >= '{' && c <= '~') return true;
|
||||||
if (c == '`') return true;
|
if (c == '`') return true;
|
||||||
if (c & 0x80 && (r.peek(3) == "\u2028"
|
if (c & 0x80)
|
||||||
|| range.peek(3) == "\u2029")) return true;
|
{
|
||||||
|
auto r = range;
|
||||||
|
range.popFrontN(offset);
|
||||||
|
return (r.canPeek(2) && (r.peek(2) == "\u2028"
|
||||||
|
|| r.peek(2) == "\u2029"));
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
171
stdx/lexer.d
171
stdx/lexer.d
|
@ -193,90 +193,130 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
alias staticTokens, alias dynamicTokens, alias pseudoTokens,
|
alias staticTokens, alias dynamicTokens, alias pseudoTokens,
|
||||||
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
alias pseudoTokenHandlers, alias possibleDefaultTokens)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static string generateMask(const ubyte[] arr)
|
||||||
|
{
|
||||||
|
import std.string;
|
||||||
|
ulong u;
|
||||||
|
for (size_t i = 0; i < arr.length && i < 8; i++)
|
||||||
|
{
|
||||||
|
u |= (cast(ulong) arr[i]) << (i * 8);
|
||||||
|
}
|
||||||
|
return format("0x%016x", u);
|
||||||
|
}
|
||||||
|
|
||||||
|
static string generateByteMask(size_t l)
|
||||||
|
{
|
||||||
|
import std.string;
|
||||||
|
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
||||||
|
}
|
||||||
|
|
||||||
static string generateCaseStatements(string[] tokens)
|
static string generateCaseStatements(string[] tokens)
|
||||||
{
|
{
|
||||||
import std.conv;
|
import std.conv;
|
||||||
import std.string;
|
import std.string;
|
||||||
|
|
||||||
static string generateMask(const ubyte[] arr)
|
|
||||||
{
|
|
||||||
ulong u;
|
|
||||||
for (size_t i = 0; i < arr.length && i < 8; i++)
|
|
||||||
{
|
|
||||||
u |= (cast(ulong) arr[i]) << (i * 8);
|
|
||||||
}
|
|
||||||
return format("0x%016x", u);
|
|
||||||
}
|
|
||||||
|
|
||||||
static string generateByteMask(size_t l)
|
|
||||||
{
|
|
||||||
return format("0x%016x", ulong.max >> ((8 - l) * 8));
|
|
||||||
}
|
|
||||||
|
|
||||||
string code;
|
string code;
|
||||||
for (size_t i = 0; i < tokens.length; i++)
|
for (size_t i = 0; i < tokens.length; i++)
|
||||||
{
|
{
|
||||||
immutable mask = generateMask(cast (const ubyte[]) tokens[i]);
|
size_t j = i + 1;
|
||||||
if (tokens[i].length >= 8)
|
size_t o = i;
|
||||||
code ~= "if (frontBytes == " ~ mask ~ ")\n";
|
while (j < tokens.length && tokens[i][0] == tokens[j][0]) j++;
|
||||||
else
|
code ~= format("case 0x%02x:\n", cast(ubyte) tokens[i][0]);
|
||||||
code ~= "if ((frontBytes & " ~ generateByteMask(tokens[i].length) ~ ") == " ~ mask ~ ")\n";
|
code ~= printCase(tokens[i .. j]);
|
||||||
code ~= "{\n";
|
i = j - 1;
|
||||||
if (staticTokens.countUntil(tokens[i]) >= 0)
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string printCase(string[] tokens)
|
||||||
|
{
|
||||||
|
string[] t = tokens;
|
||||||
|
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
|
||||||
|
import std.conv;
|
||||||
|
|
||||||
|
if (tokens.length == 1 && tokens[0].length == 1)
|
||||||
|
{
|
||||||
|
if (staticTokens.countUntil(tokens[0]) >= 0)
|
||||||
{
|
{
|
||||||
if (tokens[i].length <= 8)
|
return " range.popFront();\n"
|
||||||
|
~ " return Token(tok!\"" ~ escape(tokens[0]) ~ "\", null, line, column, index);\n";
|
||||||
|
}
|
||||||
|
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
|
||||||
|
{
|
||||||
|
return " return "
|
||||||
|
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
|
||||||
|
~ "();\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
string code;
|
||||||
|
|
||||||
|
foreach (i, token; sortedTokens)
|
||||||
|
{
|
||||||
|
immutable mask = generateMask(cast (const ubyte[]) token);
|
||||||
|
if (token.length >= 8)
|
||||||
|
code ~= " if (frontBytes == " ~ mask ~ ")\n";
|
||||||
|
else
|
||||||
|
code ~= " if ((frontBytes & " ~ generateByteMask(token.length) ~ ") == " ~ mask ~ ")\n";
|
||||||
|
code ~= " {\n";
|
||||||
|
if (staticTokens.countUntil(token) >= 0)
|
||||||
|
{
|
||||||
|
if (token.length <= 8)
|
||||||
{
|
{
|
||||||
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
|
code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||||
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
|
code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code ~= " assert (false); // " ~ escape(tokens[i]) ~ "\n";
|
code ~= " pragma(msg, \"long static tokens not supported\"); // " ~ escape(token) ~ "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (pseudoTokens.countUntil(tokens[i]) >= 0)
|
else if (pseudoTokens.countUntil(token) >= 0)
|
||||||
{
|
{
|
||||||
if (tokens[i].length < 8)
|
if (token.length < 8)
|
||||||
{
|
{
|
||||||
code ~= " return "
|
code ~= " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1]
|
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code ~= " if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\")\n";
|
code ~= " if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\")\n";
|
||||||
code ~= " return "
|
code ~= " return "
|
||||||
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1]
|
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
|
||||||
~ "();\n";
|
~ "();\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// possible default
|
// possible default
|
||||||
if (tokens[i].length < 8)
|
if (token.length < 8)
|
||||||
{
|
{
|
||||||
code ~= " if (isSeparating(" ~ text(tokens[i].length) ~ "))\n";
|
code ~= " if (isSeparating(" ~ text(token.length) ~ "))\n";
|
||||||
code ~= " {\n";
|
code ~= " {\n";
|
||||||
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
|
code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||||
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
|
code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||||
code ~= " }\n";
|
code ~= " }\n";
|
||||||
code ~= " else\n";
|
code ~= " else\n";
|
||||||
code ~= " goto defaultHandler;\n";
|
code ~= " goto default;\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code ~= " if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\" && isSeparating(" ~ text(tokens[i].length) ~ "))\n";
|
code ~= " if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\" && isSeparating(" ~ text(token.length) ~ "))\n";
|
||||||
code ~= " {\n";
|
code ~= " {\n";
|
||||||
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n";
|
code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
|
||||||
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n";
|
code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
|
||||||
code ~= " }\n";
|
code ~= " }\n";
|
||||||
code ~= " else\n";
|
code ~= " else\n";
|
||||||
code ~= " goto defaultHandler;\n";
|
code ~= " goto default;\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
code ~= "}\n";
|
code ~= " }\n";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
code ~= " else\n";
|
||||||
|
code ~= " goto default;\n";
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,15 +365,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum tokenSearch = generateCaseStatements(stupidToArray(sort!"a.length > b.length"(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));
|
enum tokenSearch = generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));
|
||||||
|
|
||||||
static ulong getFront(const ubyte[] arr) pure nothrow @trusted
|
static ulong getFront(const ubyte[] arr) pure nothrow @trusted
|
||||||
{
|
{
|
||||||
import std.stdio;
|
import std.stdio;
|
||||||
immutable importantBits = *(cast (ulong*) arr.ptr);
|
immutable importantBits = *(cast (ulong*) arr.ptr);
|
||||||
immutable filler = ulong.max >> ((8 - arr.length) * 8);
|
immutable filler = ulong.max >> ((8 - arr.length) * 8);
|
||||||
|
|
||||||
debug(1) try { writefln("0x%016x", importantBits & filler); } catch (Exception e) {}
|
|
||||||
return importantBits & filler;
|
return importantBits & filler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,10 +383,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
|
||||||
immutable size_t column = range.column;
|
immutable size_t column = range.column;
|
||||||
immutable size_t line = range.line;
|
immutable size_t line = range.line;
|
||||||
immutable ulong frontBytes = getFront(range.peek(7));
|
immutable ulong frontBytes = getFront(range.peek(7));
|
||||||
|
switch (frontBytes & 0x00000000_000000ff)
|
||||||
|
{
|
||||||
mixin(tokenSearch);
|
mixin(tokenSearch);
|
||||||
pragma(msg, tokenSearch);
|
/+pragma(msg, tokenSearch);+/
|
||||||
defaultHandler:
|
default:
|
||||||
return defaultTokenFunction();
|
return defaultTokenFunction();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LexerRange range;
|
LexerRange range;
|
||||||
|
@ -398,16 +439,16 @@ struct LexerRange
|
||||||
: bytes[index .. index + p + 1];
|
: bytes[index .. index + p + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ubyte peekAt(size_t offset) const nothrow pure @safe
|
||||||
|
{
|
||||||
|
return bytes[index + offset];
|
||||||
|
}
|
||||||
|
|
||||||
bool canPeek(size_t p) const nothrow pure @safe
|
bool canPeek(size_t p) const nothrow pure @safe
|
||||||
{
|
{
|
||||||
return index + p < bytes.length;
|
return index + p < bytes.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
LexerRange save() const nothrow pure @safe
|
|
||||||
{
|
|
||||||
return LexerRange(bytes, index, column, line);
|
|
||||||
}
|
|
||||||
|
|
||||||
void popFront() pure nothrow @safe
|
void popFront() pure nothrow @safe
|
||||||
{
|
{
|
||||||
index++;
|
index++;
|
||||||
|
@ -501,7 +542,7 @@ public:
|
||||||
}
|
}
|
||||||
body
|
body
|
||||||
{
|
{
|
||||||
memoryRequested += bytes.length;
|
debug memoryRequested += bytes.length;
|
||||||
const(Item)* found = find(bytes, hash);
|
const(Item)* found = find(bytes, hash);
|
||||||
if (found is null)
|
if (found is null)
|
||||||
return intern(bytes, hash);
|
return intern(bytes, hash);
|
||||||
|
@ -528,7 +569,7 @@ public:
|
||||||
return items[index].str;
|
return items[index].str;
|
||||||
}
|
}
|
||||||
|
|
||||||
void printStats()
|
debug void printStats()
|
||||||
{
|
{
|
||||||
import std.stdio;
|
import std.stdio;
|
||||||
writeln("Load Factor: ", cast(float) items.length / cast(float) buckets.length);
|
writeln("Load Factor: ", cast(float) items.length / cast(float) buckets.length);
|
||||||
|
@ -550,7 +591,7 @@ private:
|
||||||
{
|
{
|
||||||
immutable size_t newBucketCount = items.length * 2;
|
immutable size_t newBucketCount = items.length * 2;
|
||||||
buckets = new Item*[newBucketCount];
|
buckets = new Item*[newBucketCount];
|
||||||
rehashCount++;
|
debug rehashCount++;
|
||||||
foreach (item; items)
|
foreach (item; items)
|
||||||
{
|
{
|
||||||
immutable size_t newIndex = item.hash % newBucketCount;
|
immutable size_t newIndex = item.hash % newBucketCount;
|
||||||
|
@ -707,6 +748,6 @@ private:
|
||||||
Item*[] items;
|
Item*[] items;
|
||||||
Item*[] buckets;
|
Item*[] buckets;
|
||||||
Block[] blocks;
|
Block[] blocks;
|
||||||
size_t memoryRequested;
|
debug size_t memoryRequested;
|
||||||
uint rehashCount;
|
debug uint rehashCount;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue