Lots of optimization. Updated GDC portion of build script

This commit is contained in:
Hackerpilot 2014-01-21 23:26:23 -08:00
parent a060dabde7
commit 4ec5af9093
5 changed files with 132 additions and 90 deletions

View File

@ -11,7 +11,7 @@ dmd\
stdx/d/*.d\ stdx/d/*.d\
analysis/*.d\ analysis/*.d\
-ofdscanner\ -ofdscanner\
-m64\ -m64 -g\
-O -release -noboundscheck -inline -O -release -noboundscheck -inline
#gdc\ #gdc\
@ -23,9 +23,9 @@ dmd\
# astprinter.d\ # astprinter.d\
# formatter.d\ # formatter.d\
# outliner.d\ # outliner.d\
# style.d\
# stdx/*.d\ # stdx/*.d\
# stdx/d/*.d\ # stdx/d/*.d\
# analysis/*.d\
# -O3 -frelease -fno-bounds-check\ # -O3 -frelease -fno-bounds-check\
# -odscanner\ # -odscanner\
@ -38,8 +38,8 @@ dmd\
# astprinter.d\ # astprinter.d\
# formatter.d\ # formatter.d\
# outliner.d\ # outliner.d\
# style.d\
# stdx/*.d\ # stdx/*.d\
# stdx/d/*.d\ # stdx/d/*.d\
# analysis/*.d\
# -O3 -release\ # -O3 -release\
# -oq -of=dscanner\ # -oq -of=dscanner\

3
main.d
View File

@ -152,7 +152,7 @@ int main(string[] args)
ulong count; ulong count;
foreach (f; expandArgs(args, recursive)) foreach (f; expandArgs(args, recursive))
{ {
import core.memory;
LexerConfig config; LexerConfig config;
config.whitespaceBehavior = WhitespaceBehavior.skip; config.whitespaceBehavior = WhitespaceBehavior.skip;
config.stringBehavior = StringBehavior.source; config.stringBehavior = StringBehavior.source;
@ -162,7 +162,6 @@ int main(string[] args)
count += printTokenCount(stdout, f, tokens); count += printTokenCount(stdout, f, tokens);
else else
count += printLineCount(stdout, f, tokens); count += printLineCount(stdout, f, tokens);
cache.printStats();
} }
writefln("total:\t%d", count); writefln("total:\t%d", count);
} }

View File

@ -32,7 +32,6 @@ pure nothrow bool isLineOfCode(IdType t)
ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens) ulong printTokenCount(Tokens)(File output, string fileName, ref Tokens tokens)
{ {
ulong c; ulong c;
foreach (ref t; tokens) foreach (ref t; tokens)
{ {

View File

@ -425,7 +425,6 @@ public struct DLexer
public void popFront() pure public void popFront() pure
{ {
_popFront(); _popFront();
string comment = null;
switch (front.type) switch (front.type)
{ {
case tok!"comment": case tok!"comment":
@ -433,7 +432,11 @@ public struct DLexer
{ {
import std.string; import std.string;
if (isDocComment(front.text)) if (isDocComment(front.text))
comment = comment == null ? front.text : format("%s\n%s", comment, front.text); {
_front.comment = _front.comment == null
? front.text
: format("%s\n%s", _front.comment, front.text);
}
do _popFront(); while (front == tok!"comment"); do _popFront(); while (front == tok!"comment");
if (front == tok!"whitespace") goto case tok!"whitespace"; if (front == tok!"whitespace") goto case tok!"whitespace";
} }
@ -448,7 +451,6 @@ public struct DLexer
default: default:
break; break;
} }
_front.comment = comment;
} }
@ -715,17 +717,16 @@ public struct DLexer
lexExponent(type); lexExponent(type);
break decimalLoop; break decimalLoop;
case '.': case '.':
if (foundDot || !range.canPeek(1) || range.peek(1)[1] == '.') if (foundDot || !range.canPeek(1) || range.peekAt(1) == '.')
break decimalLoop; break decimalLoop;
else else
{ {
auto lookahead = range.peek(1);
// The following bit of silliness tries to tell the // The following bit of silliness tries to tell the
// difference between "int dot identifier" and // difference between "int dot identifier" and
// "double identifier". // "double identifier".
if (lookahead.length == 2) if (range.canPeek(1))
{ {
switch (lookahead[1]) switch (range.peekAt(1))
{ {
case '0': .. case '9': case '0': .. case '9':
goto doubleLiteral; goto doubleLiteral;
@ -1362,7 +1363,6 @@ public struct DLexer
Token lexIdentifier() pure nothrow Token lexIdentifier() pure nothrow
{ {
import std.stdio; import std.stdio;
debug(1) try { writeln("lexIdentifier"); } catch (Exception e) {}
mixin (tokenStart); mixin (tokenStart);
uint hash = 0; uint hash = 0;
while (!range.empty && !isSeparating(0)) while (!range.empty && !isSeparating(0))
@ -1418,25 +1418,28 @@ public struct DLexer
{ {
if (range.front == '\n') return true; if (range.front == '\n') return true;
if (range.front == '\r') return true; if (range.front == '\r') return true;
auto lookahead = range.peek(3); return (range.front & 0x80) && range.canPeek(2)
if (lookahead.length == 0) return false; && (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
if (lookahead == "\u2028" || lookahead == "\u2029")
return true;
return false;
} }
bool isSeparating(size_t offset) const pure nothrow @safe bool isSeparating(size_t offset) pure nothrow @safe
{ {
auto r = range.save(); if (!range.canPeek(offset)) return false;
r.popFrontN(offset); auto c = range.peekAt(offset);
auto c = r.front; if (c >= 'A' && c <= 'Z') return false;
if (c >= 'a' && c <= 'z') return false;
if (c <= 0x2f) return true; if (c <= 0x2f) return true;
if (c >= ':' && c <= '@') return true; if (c >= ':' && c <= '@') return true;
if (c >= '[' && c <= '^') return true; if (c >= '[' && c <= '^') return true;
if (c >= '{' && c <= '~') return true; if (c >= '{' && c <= '~') return true;
if (c == '`') return true; if (c == '`') return true;
if (c & 0x80 && (r.peek(3) == "\u2028" if (c & 0x80)
|| range.peek(3) == "\u2029")) return true; {
auto r = range;
range.popFrontN(offset);
return (r.canPeek(2) && (r.peek(2) == "\u2028"
|| r.peek(2) == "\u2029"));
}
return false; return false;
} }

View File

@ -193,90 +193,130 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
alias staticTokens, alias dynamicTokens, alias pseudoTokens, alias staticTokens, alias dynamicTokens, alias pseudoTokens,
alias pseudoTokenHandlers, alias possibleDefaultTokens) alias pseudoTokenHandlers, alias possibleDefaultTokens)
{ {
static string generateMask(const ubyte[] arr)
{
import std.string;
ulong u;
for (size_t i = 0; i < arr.length && i < 8; i++)
{
u |= (cast(ulong) arr[i]) << (i * 8);
}
return format("0x%016x", u);
}
static string generateByteMask(size_t l)
{
import std.string;
return format("0x%016x", ulong.max >> ((8 - l) * 8));
}
static string generateCaseStatements(string[] tokens) static string generateCaseStatements(string[] tokens)
{ {
import std.conv; import std.conv;
import std.string; import std.string;
static string generateMask(const ubyte[] arr)
{
ulong u;
for (size_t i = 0; i < arr.length && i < 8; i++)
{
u |= (cast(ulong) arr[i]) << (i * 8);
}
return format("0x%016x", u);
}
static string generateByteMask(size_t l)
{
return format("0x%016x", ulong.max >> ((8 - l) * 8));
}
string code; string code;
for (size_t i = 0; i < tokens.length; i++) for (size_t i = 0; i < tokens.length; i++)
{ {
immutable mask = generateMask(cast (const ubyte[]) tokens[i]); size_t j = i + 1;
if (tokens[i].length >= 8) size_t o = i;
code ~= "if (frontBytes == " ~ mask ~ ")\n"; while (j < tokens.length && tokens[i][0] == tokens[j][0]) j++;
else code ~= format("case 0x%02x:\n", cast(ubyte) tokens[i][0]);
code ~= "if ((frontBytes & " ~ generateByteMask(tokens[i].length) ~ ") == " ~ mask ~ ")\n"; code ~= printCase(tokens[i .. j]);
code ~= "{\n"; i = j - 1;
if (staticTokens.countUntil(tokens[i]) >= 0) }
return code;
}
static string printCase(string[] tokens)
{
string[] t = tokens;
string[] sortedTokens = stupidToArray(sort!"a.length > b.length"(t));
import std.conv;
if (tokens.length == 1 && tokens[0].length == 1)
{
if (staticTokens.countUntil(tokens[0]) >= 0)
{ {
if (tokens[i].length <= 8) return " range.popFront();\n"
~ " return Token(tok!\"" ~ escape(tokens[0]) ~ "\", null, line, column, index);\n";
}
else if (pseudoTokens.countUntil(tokens[0]) >= 0)
{
return " return "
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[0]) + 1]
~ "();\n";
}
}
string code;
foreach (i, token; sortedTokens)
{
immutable mask = generateMask(cast (const ubyte[]) token);
if (token.length >= 8)
code ~= " if (frontBytes == " ~ mask ~ ")\n";
else
code ~= " if ((frontBytes & " ~ generateByteMask(token.length) ~ ") == " ~ mask ~ ")\n";
code ~= " {\n";
if (staticTokens.countUntil(token) >= 0)
{
if (token.length <= 8)
{ {
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n"; code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n"; code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
} }
else else
{ {
code ~= " assert (false); // " ~ escape(tokens[i]) ~ "\n"; code ~= " pragma(msg, \"long static tokens not supported\"); // " ~ escape(token) ~ "\n";
} }
} }
else if (pseudoTokens.countUntil(tokens[i]) >= 0) else if (pseudoTokens.countUntil(token) >= 0)
{ {
if (tokens[i].length < 8) if (token.length < 8)
{ {
code ~= " return " code ~= " return "
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1] ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
~ "();\n"; ~ "();\n";
} }
else else
{ {
code ~= " if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\")\n"; code ~= " if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\")\n";
code ~= " return " code ~= " return "
~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(tokens[i]) + 1] ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1]
~ "();\n"; ~ "();\n";
} }
} }
else else
{ {
// possible default // possible default
if (tokens[i].length < 8) if (token.length < 8)
{ {
code ~= " if (isSeparating(" ~ text(tokens[i].length) ~ "))\n"; code ~= " if (isSeparating(" ~ text(token.length) ~ "))\n";
code ~= " {\n"; code ~= " {\n";
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n"; code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n"; code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
code ~= " }\n"; code ~= " }\n";
code ~= " else\n"; code ~= " else\n";
code ~= " goto defaultHandler;\n"; code ~= " goto default;\n";
} }
else else
{ {
code ~= " if (range.peek(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~"\" && isSeparating(" ~ text(tokens[i].length) ~ "))\n"; code ~= " if (range.peek(" ~ text(token.length) ~ ") == \"" ~ escape(token) ~"\" && isSeparating(" ~ text(token.length) ~ "))\n";
code ~= " {\n"; code ~= " {\n";
code ~= " range.popFrontN(" ~ text(tokens[i].length) ~ ");\n"; code ~= " range.popFrontN(" ~ text(token.length) ~ ");\n";
code ~= " return Token(tok!\"" ~ escape(tokens[i]) ~ "\", null, line, column, index);\n"; code ~= " return Token(tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n";
code ~= " }\n"; code ~= " }\n";
code ~= " else\n"; code ~= " else\n";
code ~= " goto defaultHandler;\n"; code ~= " goto default;\n";
} }
} }
code ~= "}\n"; code ~= " }\n";
} }
code ~= " else\n";
code ~= " goto default;\n";
return code; return code;
} }
@ -325,15 +365,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
return retVal; return retVal;
} }
enum tokenSearch = generateCaseStatements(stupidToArray(sort!"a.length > b.length"(staticTokens ~ pseudoTokens ~ possibleDefaultTokens))); enum tokenSearch = generateCaseStatements(stupidToArray(sort(staticTokens ~ pseudoTokens ~ possibleDefaultTokens)));
static ulong getFront(const ubyte[] arr) pure nothrow @trusted static ulong getFront(const ubyte[] arr) pure nothrow @trusted
{ {
import std.stdio; import std.stdio;
immutable importantBits = *(cast (ulong*) arr.ptr); immutable importantBits = *(cast (ulong*) arr.ptr);
immutable filler = ulong.max >> ((8 - arr.length) * 8); immutable filler = ulong.max >> ((8 - arr.length) * 8);
debug(1) try { writefln("0x%016x", importantBits & filler); } catch (Exception e) {}
return importantBits & filler; return importantBits & filler;
} }
@ -345,10 +383,13 @@ mixin template Lexer(IDType, Token, alias defaultTokenFunction,
immutable size_t column = range.column; immutable size_t column = range.column;
immutable size_t line = range.line; immutable size_t line = range.line;
immutable ulong frontBytes = getFront(range.peek(7)); immutable ulong frontBytes = getFront(range.peek(7));
switch (frontBytes & 0x00000000_000000ff)
{
mixin(tokenSearch); mixin(tokenSearch);
pragma(msg, tokenSearch); /+pragma(msg, tokenSearch);+/
defaultHandler: default:
return defaultTokenFunction(); return defaultTokenFunction();
}
} }
LexerRange range; LexerRange range;
@ -398,16 +439,16 @@ struct LexerRange
: bytes[index .. index + p + 1]; : bytes[index .. index + p + 1];
} }
ubyte peekAt(size_t offset) const nothrow pure @safe
{
return bytes[index + offset];
}
bool canPeek(size_t p) const nothrow pure @safe bool canPeek(size_t p) const nothrow pure @safe
{ {
return index + p < bytes.length; return index + p < bytes.length;
} }
LexerRange save() const nothrow pure @safe
{
return LexerRange(bytes, index, column, line);
}
void popFront() pure nothrow @safe void popFront() pure nothrow @safe
{ {
index++; index++;
@ -501,7 +542,7 @@ public:
} }
body body
{ {
memoryRequested += bytes.length; debug memoryRequested += bytes.length;
const(Item)* found = find(bytes, hash); const(Item)* found = find(bytes, hash);
if (found is null) if (found is null)
return intern(bytes, hash); return intern(bytes, hash);
@ -528,7 +569,7 @@ public:
return items[index].str; return items[index].str;
} }
void printStats() debug void printStats()
{ {
import std.stdio; import std.stdio;
writeln("Load Factor: ", cast(float) items.length / cast(float) buckets.length); writeln("Load Factor: ", cast(float) items.length / cast(float) buckets.length);
@ -550,7 +591,7 @@ private:
{ {
immutable size_t newBucketCount = items.length * 2; immutable size_t newBucketCount = items.length * 2;
buckets = new Item*[newBucketCount]; buckets = new Item*[newBucketCount];
rehashCount++; debug rehashCount++;
foreach (item; items) foreach (item; items)
{ {
immutable size_t newIndex = item.hash % newBucketCount; immutable size_t newIndex = item.hash % newBucketCount;
@ -707,6 +748,6 @@ private:
Item*[] items; Item*[] items;
Item*[] buckets; Item*[] buckets;
Block[] blocks; Block[] blocks;
size_t memoryRequested; debug size_t memoryRequested;
uint rehashCount; debug uint rehashCount;
} }