std.uni: optimize simpleCaseTable

Cuts size of simpleCaseTable entries in half (8 -> 4 bytes), reducing table size from 24 Kb to 12 Kb.
Still only 22 out of 32 bits per entry are actually used, so could be optimized further.
Using plain integers saves 10 ms semantic2 compile time.
This commit is contained in:
Dennis Korpel 2024-01-06 15:05:35 +01:00 committed by The Dlang Bot
parent 733d335268
commit 135e9ad193
3 changed files with 436 additions and 790 deletions

File diff suppressed because it is too large Load diff

View file

@ -8016,19 +8016,19 @@ if (isInputRange!S1 && isSomeChar!(ElementEncodingType!S1)
if (idx2 != EMPTY_CASE_TRIE) if (idx2 != EMPTY_CASE_TRIE)
{// both cased chars {// both cased chars
// adjust idx --> start of bucket // adjust idx --> start of bucket
idx = idx - sTable[idx].n; idx = idx - sTable(idx).n;
idx2 = idx2 - sTable[idx2].n; idx2 = idx2 - sTable(idx2).n;
if (idx == idx2)// one bucket, equivalent chars if (idx == idx2)// one bucket, equivalent chars
continue; continue;
else// not the same bucket else// not the same bucket
diff = sTable[idx].ch - sTable[idx2].ch; diff = sTable(idx).ch - sTable(idx2).ch;
} }
else else
diff = sTable[idx - sTable[idx].n].ch - rhs; diff = sTable(idx - sTable(idx).n).ch - rhs;
} }
else if (idx2 != EMPTY_CASE_TRIE) else if (idx2 != EMPTY_CASE_TRIE)
{ {
diff = lhs - sTable[idx2 - sTable[idx2].n].ch; diff = lhs - sTable(idx2 - sTable(idx2).n).ch;
} }
// one of chars is not cased at all // one of chars is not cased at all
return diff; return diff;
@ -8331,7 +8331,7 @@ package(std) auto simpleCaseFoldings(dchar ch) @safe
{ {
return c; return c;
} }
auto ch = sTable[idx].ch; auto ch = sTable(idx).ch;
return ch; return ch;
} }
@ -8367,7 +8367,7 @@ package(std) auto simpleCaseFoldings(dchar ch) @safe
immutable idx = simpleCaseTrie[ch]; immutable idx = simpleCaseTrie[ch];
if (idx == EMPTY_CASE_TRIE) if (idx == EMPTY_CASE_TRIE)
return Range(ch); return Range(ch);
auto entry = sTable[idx]; auto entry = sTable(idx);
immutable start = idx - entry.n; immutable start = idx - entry.n;
return Range(start, entry.size); return Range(start, entry.size);
} }

View file

@ -117,26 +117,37 @@ struct FullCaseEntry
} }
} }
enum mixedCCEntry = ` /// 8 byte easy SimpleCaseEntry, will be compressed to SCE which bit packs values to 4 bytes
struct SimpleCaseEntry struct SimpleCaseEntry
{ {
uint ch; uint ch;
ubyte n, bucket;// n - number in bucket ubyte n; // number in bucket
ubyte size;
bool isLower;
bool isUpper;
}
pure nothrow @nogc: enum mixedCCEntry = `
/// Simple Case Entry, wrapper around uint to extract bit fields from simpleCaseTable()
struct SCE
{
uint x;
@property ubyte size() const nothrow @nogc pure @safe:
this(uint x)
{ {
return bucket & 0x3F; this.x = x;
} }
@property auto isLower() const
this(uint ch, ubyte n, ubyte size)
{ {
return bucket & 0x40; this.x = ch | n << 20 | size << 24;
}
@property auto isUpper() const
{
return bucket & 0x80;
} }
int ch() const { return this.x & 0x1FFFF; }
int n() const { return (this.x >> 20) & 0xF; }
int size() const { return this.x >> 24; }
} }
/// Bit backed FullCaseEntry /// Bit backed FullCaseEntry
@ -432,10 +443,13 @@ void loadCaseFolding(string f)
sort(entry[0 .. size]); sort(entry[0 .. size]);
foreach (i, value; entry[0 .. size]) foreach (i, value; entry[0 .. size])
{ {
auto withFlags = cast(ubyte) size | (value in lowerCaseSet ? 0x40 : 0) simpleTable ~= SimpleCaseEntry(
| (value in upperCaseSet ? 0x80 : 0); value,
simpleTable ~= SimpleCaseEntry(value, cast(ubyte) i, cast(ubyte) i,
cast(ubyte) withFlags); cast(ubyte) size,
cast(bool) (value in lowerCaseSet),
cast(bool) (value in upperCaseSet)
);
} }
} }
@ -882,17 +896,22 @@ void writeCaseFolding(File sink)
{ {
write(mixedCCEntry); write(mixedCCEntry);
writeln("@property immutable(SimpleCaseEntry[]) simpleCaseTable()"); writeln("SCE simpleCaseTable(size_t i)");
writeln("{"); writeln("{");
writeln("alias SCE = SimpleCaseEntry;"); writef("static immutable uint[%d] t = [", simpleTable.length);
writeln("static immutable SCE[] t = [");
foreach (i, v; simpleTable) foreach (i, v; simpleTable)
{ {
writef("SCE(0x%04x, %s, 0x%0x),", v.ch, v.n, v.bucket); if (i % 8 == 0) writeln();
if (i % 4 == 0) writeln(); writef("0x%08X,", SCE(v.ch, v.n, v.size).x);
} }
// Inspect max integer size, so efficient bit packing can be found:
stderr.writefln("max n: %X", simpleTable.maxElement!(x => x.n).n); // n: 2-bit
stderr.writefln("max ch: %X", simpleTable.maxElement!(x => x.ch).ch); // ch: 17-bit
stderr.writefln("max size: %X", simpleTable.maxElement!(x => x.size).size); // size: 3-bit
writeln("];"); writeln("];");
writeln("return t;"); writeln("return SCE(t[i]);");
writeln("}"); writeln("}");
writeln("@property FCE fullCaseTable(size_t index) nothrow @nogc @safe pure"); writeln("@property FCE fullCaseTable(size_t index) nothrow @nogc @safe pure");
writeln("{"); writeln("{");