mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 06:30:28 +03:00
purify std.uni constructs, blocked by std.algortihm.sort
This commit is contained in:
parent
cbd9bd3b79
commit
c264bd6a51
2 changed files with 99 additions and 80 deletions
File diff suppressed because one or more lines are too long
167
std/uni.d
167
std/uni.d
|
@ -68,10 +68,10 @@
|
|||
$(LI
|
||||
Another useful building block for Unicode-aware parsers is avoiding wasteful
|
||||
UTF decoding yet performing character classification of encoded $(CODEPOINTS).
|
||||
$(LREF utfMatcher) provides an improvement over the usual workflow
|
||||
of decode-classify-process, combining decode and classify steps.
|
||||
By extracting nessary bits directly from encoded
|
||||
$(S_LINK Code unit, code units) matchers achieve
|
||||
$(LREF utfMatcher) provides an improvement over the usual workflow
|
||||
of decode-classify-process, combining decode and classify steps.
|
||||
By extracting nessary bits directly from encoded
|
||||
$(S_LINK Code unit, code units) matchers achieve
|
||||
significant performance improvement. See $(LREF MatcherConcept) for
|
||||
common interface of UTF matchers.
|
||||
)
|
||||
|
@ -1390,7 +1390,6 @@ private struct SliceOverIndexed(T)
|
|||
{
|
||||
enum assignableIndex = is(typeof((){ T.init[0] = Item.init; }));
|
||||
enum assignableSlice = is(typeof((){ T.init[0..0] = Item.init; }));
|
||||
|
||||
auto opIndex(size_t idx)const
|
||||
in
|
||||
{
|
||||
|
@ -1888,6 +1887,7 @@ public alias CodepointSet = InversionList!GcPolicy;
|
|||
*/
|
||||
public struct CodepointInterval
|
||||
{
|
||||
pure:
|
||||
uint[2] _tuple;
|
||||
alias _tuple this;
|
||||
this(uint low, uint high)
|
||||
|
@ -1899,8 +1899,8 @@ public struct CodepointInterval
|
|||
{
|
||||
return this[0] == val[0] && this[1] == val[1];
|
||||
}
|
||||
@property ref uint a(){ return _tuple[0]; }
|
||||
@property ref uint b(){ return _tuple[1]; }
|
||||
@property ref inout(uint) a() inout { return _tuple[0]; }
|
||||
@property ref inout(uint) b() inout { return _tuple[1]; }
|
||||
}
|
||||
|
||||
//@@@BUG another forward reference workaround
|
||||
|
@ -1980,10 +1980,11 @@ public struct CodepointInterval
|
|||
@trusted public struct InversionList(SP=GcPolicy)
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
Construct from another code point set of any type.
|
||||
*/
|
||||
this(Set)(Set set)
|
||||
this(Set)(Set set) pure
|
||||
if(isCodepointSet!Set)
|
||||
{
|
||||
uint[] arr;
|
||||
|
@ -1998,7 +1999,7 @@ public:
|
|||
/**
|
||||
Construct a set from a forward range of code point intervals.
|
||||
*/
|
||||
this(Range)(Range intervals)
|
||||
this(Range)(Range intervals) /*pure */ //@@@BUG@@@ sort is not pure
|
||||
if(isForwardRange!Range && isIntegralPair!(ElementType!Range))
|
||||
{
|
||||
auto flattened = roundRobin(intervals.save.map!"a[0]"(),
|
||||
|
@ -2008,7 +2009,7 @@ public:
|
|||
}
|
||||
|
||||
//helper function that avoids sanity check to be CTFE-friendly
|
||||
private static fromIntervals(Range)(Range intervals)
|
||||
private static fromIntervals(Range)(Range intervals) pure
|
||||
{
|
||||
auto flattened = roundRobin(intervals.save.map!"a[0]"(),
|
||||
intervals.save.map!"a[1]"());
|
||||
|
@ -2016,6 +2017,23 @@ public:
|
|||
set.data = Uint24Array!(SP)(flattened);
|
||||
return set;
|
||||
}
|
||||
//ditto untill sort is CTFE-able
|
||||
private static fromIntervals()(uint[] intervals...) pure
|
||||
in
|
||||
{
|
||||
assert(intervals.length % 2 == 0, "Odd number of interval bounds [a, b)!");
|
||||
for(uint i=0; i<intervals.length/2; i++)
|
||||
{
|
||||
auto a = intervals[2*i], b = intervals[2*i+1];
|
||||
assert(a < b, text("illegal interval [a, b): ", a, " > ", b));
|
||||
}
|
||||
}
|
||||
body
|
||||
{
|
||||
InversionList set;
|
||||
set.data = Uint24Array!(SP)(intervals);
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
Construct a set from plain values of code point intervals.
|
||||
|
@ -3095,7 +3113,7 @@ void write24(ubyte* ptr, uint val, size_t idx) pure nothrow
|
|||
alias opDollar = length;
|
||||
|
||||
// Read 24-bit packed integer
|
||||
uint opIndex(size_t idx)const
|
||||
uint opIndex(size_t idx) const
|
||||
{
|
||||
return read24(data.ptr, idx);
|
||||
}
|
||||
|
@ -4382,7 +4400,7 @@ public template buildTrie(Value, Key, Args...)
|
|||
}
|
||||
}
|
||||
|
||||
//helper in place of assumeSize to
|
||||
//helper in place of assumeSize to
|
||||
//reduce mangled name & help DMD inline Trie functors
|
||||
struct clamp(size_t bits)
|
||||
{
|
||||
|
@ -4407,19 +4425,19 @@ public struct MatcherConcept
|
|||
{
|
||||
/**
|
||||
$(P Perform a sematic equivalent 2 operations:
|
||||
decoding a $(CODEPOINT) at front of $(D inp) and testing if
|
||||
decoding a $(CODEPOINT) at front of $(D inp) and testing if
|
||||
it belongs to the set of $(CODEPOINTS) of this matcher. )
|
||||
|
||||
$(P The effect on $(D inp) depends on the kind of function called:)
|
||||
|
||||
$(P Match. If the codepoint is found in the set then range $(D inp)
|
||||
is advanced by its size in $(S_LINK Code unit, code units),
|
||||
is advanced by its size in $(S_LINK Code unit, code units),
|
||||
otherwise the range is not modifed.)
|
||||
|
||||
$(P Skip. The range is always advanced by the size
|
||||
of the tested $(CODEPOINT) regardless of the result of test.)
|
||||
|
||||
$(P Test. The range is left unaffected regardless
|
||||
$(P Test. The range is left unaffected regardless
|
||||
of the result of test.)
|
||||
*/
|
||||
public bool match(Range)(ref Range inp)
|
||||
|
@ -4445,7 +4463,7 @@ public struct MatcherConcept
|
|||
public unittest
|
||||
{
|
||||
string truth = "2² = 4";
|
||||
auto m = utfMatcher!char(unicode.Number);
|
||||
auto m = utfMatcher!char(unicode.Number);
|
||||
assert(m.match(truth)); // '2' is a number all right
|
||||
assert(truth == "² = 4"); // skips on match
|
||||
assert(m.match(truth)); // so is the superscript '2'
|
||||
|
@ -4458,17 +4476,17 @@ public struct MatcherConcept
|
|||
}
|
||||
/**
|
||||
Advanced feature - provide direct access to a subset of matcher based a
|
||||
set of known encoding lengths. Lengths are provided in
|
||||
$(S_LINK Code unit, code units). The sub-matcher then may do less
|
||||
set of known encoding lengths. Lengths are provided in
|
||||
$(S_LINK Code unit, code units). The sub-matcher then may do less
|
||||
operations per any $(D test)/$(D match).
|
||||
|
||||
Use with care as the sub-matcher won't match
|
||||
any $(CODEPOINTS) that have encoded length that doesn't belong
|
||||
to the selected set of lengths. Also the sub-matcher object references
|
||||
the parent matcher and must not be used past the liftetime
|
||||
Use with care as the sub-matcher won't match
|
||||
any $(CODEPOINTS) that have encoded length that doesn't belong
|
||||
to the selected set of lengths. Also the sub-matcher object references
|
||||
the parent matcher and must not be used past the liftetime
|
||||
of the latter.
|
||||
|
||||
Another caveat of using sub-matcher is that skip is not available
|
||||
Another caveat of using sub-matcher is that skip is not available
|
||||
preciesly because sub-matcher doesn't detect all lengths.
|
||||
*/
|
||||
@property auto subMatcher(Lengths...)()
|
||||
|
@ -4481,7 +4499,7 @@ public struct MatcherConcept
|
|||
public unittest
|
||||
{
|
||||
auto m = utfMatcher!char(unicode.Number);
|
||||
string square = "2²";
|
||||
string square = "2²";
|
||||
// about sub-matchers
|
||||
assert(!m.subMatcher!(2,3,4).test(square)); // ASCII no covered
|
||||
assert(m.subMatcher!1.match(square)); // ASCII-only, works
|
||||
|
@ -4500,7 +4518,7 @@ public struct MatcherConcept
|
|||
/**
|
||||
Test if $(D M) is a Matcher for ranges of $(D Char).
|
||||
*/
|
||||
public enum isMatcher(M, C) = __traits(compiles, (){
|
||||
public enum isMatcher(M, C) = __traits(compiles, (){
|
||||
C[] s;
|
||||
auto d = s.decoder;
|
||||
M m;
|
||||
|
@ -4527,7 +4545,7 @@ enum Mode {
|
|||
};
|
||||
|
||||
mixin template ForwardStrings()
|
||||
{
|
||||
{
|
||||
private bool fwdStr(string fn, C)(ref C[] str) const pure
|
||||
{
|
||||
alias type = typeof(units(str));
|
||||
|
@ -4548,10 +4566,10 @@ template Utf8Matcher()
|
|||
//for 1-stage ASCII
|
||||
alias AsciiSpec = TypeTuple!(bool, char, clamp!7);
|
||||
//for 2-stage lookup of 2 byte UTF-8 sequences
|
||||
alias Utf8Spec2 = TypeTuple!(bool, char[2],
|
||||
alias Utf8Spec2 = TypeTuple!(bool, char[2],
|
||||
clampIdx!(0, 5), clampIdx!(1, 6));
|
||||
//ditto for 3 byte
|
||||
alias Utf8Spec3 = TypeTuple!(bool, char[3],
|
||||
alias Utf8Spec3 = TypeTuple!(bool, char[3],
|
||||
clampIdx!(0, 4),
|
||||
clampIdx!(1, 6),
|
||||
clampIdx!(2, 6)
|
||||
|
@ -4571,7 +4589,7 @@ template Utf8Matcher()
|
|||
|
||||
enum leadMask(size_t size) = (cast(size_t)1<<(7 - size))-1;
|
||||
enum encMask(size_t size) = ((1<<size)-1)<<(8-size);
|
||||
|
||||
|
||||
char trancate()(char ch) pure @safe
|
||||
{
|
||||
assert((ch & 0b1100_0000) == 0x80);
|
||||
|
@ -4585,7 +4603,7 @@ template Utf8Matcher()
|
|||
char[4] buf;
|
||||
std.utf.encode(buf, ch);
|
||||
char[sz] ret;
|
||||
buf[0] &= leadMask!sz;
|
||||
buf[0] &= leadMask!sz;
|
||||
foreach(n; 1..sz)
|
||||
buf[n] = buf[n] & 0x3f; //keep 6 lower bits
|
||||
ret[] = buf[0..sz];
|
||||
|
@ -4613,7 +4631,7 @@ template Utf8Matcher()
|
|||
import std.string : format;
|
||||
enum hasASCII = staticIndexOf!(1, Sizes) >= 0;
|
||||
alias UniSizes = Erase!(1, Sizes);
|
||||
|
||||
|
||||
//generate dispatch code sequence for unicode parts
|
||||
static auto genDispatch()
|
||||
{
|
||||
|
@ -4621,7 +4639,7 @@ template Utf8Matcher()
|
|||
foreach(size; UniSizes)
|
||||
code ~= format(q{
|
||||
(ch & ~leadMask!%d) == encMask!(%d)
|
||||
? lookup!(%d, mode)(inp) :
|
||||
? lookup!(%d, mode)(inp) :
|
||||
}, size, size, size);
|
||||
code ~= "false";
|
||||
return code;
|
||||
|
@ -4692,9 +4710,9 @@ template Utf8Matcher()
|
|||
|
||||
struct Impl(Sizes...)
|
||||
{
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
"Only lengths of 1, 2, 3 and 4 code unit are possible for UTF-8");
|
||||
private:
|
||||
private:
|
||||
//pick tables for chosen sizes
|
||||
alias OurTabs = staticMap!(Table, Sizes);
|
||||
OurTabs tables;
|
||||
|
@ -4711,7 +4729,7 @@ template Utf8Matcher()
|
|||
{
|
||||
import std.typecons;
|
||||
if(inp.length < size)
|
||||
return badEncoding(), false;
|
||||
return badEncoding(), false;
|
||||
char[size] needle = void;
|
||||
needle[0] = leadMask!size & inp[0];
|
||||
foreach(i; staticIota!(1, size))
|
||||
|
@ -4737,7 +4755,7 @@ template Utf8Matcher()
|
|||
|
||||
struct CherryPick(I, Sizes...)
|
||||
{
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
"Only lengths of 1, 2, 3 and 4 code unit are possible for UTF-8");
|
||||
private:
|
||||
I* m;
|
||||
|
@ -4761,17 +4779,17 @@ template Utf16Matcher()
|
|||
}
|
||||
|
||||
alias Seq = TypeTuple;
|
||||
//1-stage ASCII
|
||||
//1-stage ASCII
|
||||
alias AsciiSpec = Seq!(bool, wchar, clamp!7);
|
||||
//2-stage BMP
|
||||
alias BmpSpec = Seq!(bool, wchar, sliceBits!(7, 16), sliceBits!(0, 7));
|
||||
//4-stage - full Unicode
|
||||
//assume that 0xD800 & 0xDC00 bits are cleared
|
||||
//thus leaving 10 bit per wchar to worry about
|
||||
alias UniSpec = Seq!(bool, wchar[2],
|
||||
alias UniSpec = Seq!(bool, wchar[2],
|
||||
assumeSize!(x=>x[0]>>4, 6), assumeSize!(x=>x[0]&0xf, 4),
|
||||
assumeSize!(x=>x[1]>>6, 4), assumeSize!(x=>x[1]&0x3f, 6),
|
||||
);
|
||||
assumeSize!(x=>x[1]>>6, 4), assumeSize!(x=>x[1]&0x3f, 6),
|
||||
);
|
||||
alias Ascii = typeof(TrieBuilder!(AsciiSpec)(false).build());
|
||||
alias Bmp = typeof(TrieBuilder!(BmpSpec)(false).build());
|
||||
alias Uni = typeof(TrieBuilder!(UniSpec)(false).build());
|
||||
|
@ -4790,8 +4808,8 @@ template Utf16Matcher()
|
|||
auto build(Set)(Set set)
|
||||
{
|
||||
auto ascii = set & unicode.ASCII;
|
||||
auto bmp = (set & CodepointSet(0x80, 0xFFFF+1))
|
||||
- CodepointSet(0xD800, 0xDFFF+1);
|
||||
auto bmp = (set & CodepointSet.fromIntervals(0x80, 0xFFFF+1))
|
||||
- CodepointSet.fromIntervals(0xD800, 0xDFFF+1);
|
||||
auto other = set - (bmp | ascii);
|
||||
auto asciiT = ascii.byCodepoint.map!(x=>cast(char)x).buildTrie!(AsciiSpec);
|
||||
auto bmpT = bmp.byCodepoint.map!(x=>cast(wchar)x).buildTrie!(BmpSpec);
|
||||
|
@ -4826,7 +4844,7 @@ template Utf16Matcher()
|
|||
assert(!inp.empty);
|
||||
auto ch = inp[0];
|
||||
static if(sizeFlags & 1)
|
||||
return ch < 0x80 ? (inp.popFront(), ascii[ch])
|
||||
return ch < 0x80 ? (inp.popFront(), ascii[ch])
|
||||
: lookupUni!mode(inp);
|
||||
else
|
||||
return lookupUni!mode(inp);
|
||||
|
@ -4862,7 +4880,7 @@ template Utf16Matcher()
|
|||
{
|
||||
return fwdStr!"test"(str);
|
||||
}
|
||||
|
||||
|
||||
mixin ForwardStrings; //dispatch strings to range versions
|
||||
}
|
||||
|
||||
|
@ -4870,7 +4888,7 @@ template Utf16Matcher()
|
|||
if(Sizes.length >= 1 && Sizes.length <= 2)
|
||||
{
|
||||
private:
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
"Only lengths of 1 and 2 code units are possible in UTF-16");
|
||||
static if(Sizes.length > 1)
|
||||
enum sizeFlags = Sizes[0] | Sizes[1];
|
||||
|
@ -4880,7 +4898,7 @@ template Utf16Matcher()
|
|||
static if(sizeFlags & 1)
|
||||
{
|
||||
Ascii ascii;
|
||||
Bmp bmp;
|
||||
Bmp bmp;
|
||||
}
|
||||
static if(sizeFlags & 2)
|
||||
{
|
||||
|
@ -4894,7 +4912,7 @@ template Utf16Matcher()
|
|||
}
|
||||
|
||||
bool lookupUni(Mode mode, Range)(ref Range inp) const pure
|
||||
{
|
||||
{
|
||||
wchar x = cast(wchar)(inp[0] - 0xD800);
|
||||
//not a high surrogate
|
||||
if(x > 0x3FF)
|
||||
|
@ -4902,7 +4920,7 @@ template Utf16Matcher()
|
|||
static if(sizeFlags & 1)
|
||||
{
|
||||
auto ch = inp[0];
|
||||
static if(mode == Mode.alwaysSkip)
|
||||
static if(mode == Mode.alwaysSkip)
|
||||
inp.popFront();
|
||||
static if(mode == Mode.skipOnMatch)
|
||||
return bmp[ch] && (inp.popFront(), true);
|
||||
|
@ -4915,9 +4933,9 @@ template Utf16Matcher()
|
|||
else
|
||||
{
|
||||
static if(sizeFlags & 2)
|
||||
{
|
||||
{
|
||||
if(inp.length < 2)
|
||||
badEncoding();
|
||||
badEncoding();
|
||||
wchar y = cast(wchar)(inp[1] - 0xDC00);
|
||||
//not a low surrogate
|
||||
if(y > 0x3FF)
|
||||
|
@ -4953,24 +4971,24 @@ template Utf16Matcher()
|
|||
return m.lookupUni!mode(inp);
|
||||
}
|
||||
mixin DefMatcher;
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
static assert(allSatisfy!(validSize, Sizes),
|
||||
"Only lengths of 1 and 2 code units are possible in UTF-16");
|
||||
}
|
||||
}
|
||||
|
||||
private auto utf8Matcher(Set)(Set set)
|
||||
{
|
||||
return Utf8Matcher!().build(set);
|
||||
{
|
||||
return Utf8Matcher!().build(set);
|
||||
}
|
||||
|
||||
private auto utf16Matcher(Set)(Set set)
|
||||
{
|
||||
return Utf16Matcher!().build(set);
|
||||
return Utf16Matcher!().build(set);
|
||||
}
|
||||
|
||||
/**
|
||||
Constructs a matcher object
|
||||
to classify $(CODEPOINTS) from the $(D set) for encoding
|
||||
to classify $(CODEPOINTS) from the $(D set) for encoding
|
||||
that has $(D Char) as code unit.
|
||||
|
||||
See $(LREF MatcherConcept) for API outline.
|
||||
|
@ -4983,15 +5001,15 @@ public auto utfMatcher(Char, Set)(Set set)
|
|||
else static if(is(Char : wchar))
|
||||
return utf16Matcher(set);
|
||||
else static if(is(Char : dchar))
|
||||
static assert(false, "UTF-32 needs no decoding,
|
||||
static assert(false, "UTF-32 needs no decoding,
|
||||
and thus not supported by utfMatcher");
|
||||
else
|
||||
static assert(false, "Only character types 'char' and 'wchar' are allowed");
|
||||
static assert(false, "Only character types 'char' and 'wchar' are allowed");
|
||||
}
|
||||
|
||||
|
||||
//a range of code units, packed with index to speed up forward iteration
|
||||
public auto decoder(C)(C[] s, size_t offset=0)
|
||||
package auto decoder(C)(C[] s, size_t offset=0)
|
||||
if(is(C : wchar) || is(C : char))
|
||||
{
|
||||
static struct Decoder
|
||||
|
@ -5000,7 +5018,7 @@ public auto decoder(C)(C[] s, size_t offset=0)
|
|||
C[] str;
|
||||
size_t idx;
|
||||
@property C front(){ return str[idx]; }
|
||||
@property C back(){ return str[$-1]; }
|
||||
@property C back(){ return str[$-1]; }
|
||||
void popFront(){ idx++; }
|
||||
void popBack(){ str = str[0..$-1]; }
|
||||
void popFrontN(size_t n){ idx += n; }
|
||||
|
@ -5017,10 +5035,10 @@ public auto decoder(C)(C[] s, size_t offset=0)
|
|||
}
|
||||
|
||||
/*
|
||||
Expose UTF string $(D s) as a random-access
|
||||
Expose UTF string $(D s) as a random-access
|
||||
range of $(S_LINK Code unit, code units).
|
||||
*/
|
||||
public auto units(C)(C[] s)
|
||||
package auto units(C)(C[] s)
|
||||
if(is(C : wchar) || is(C : char))
|
||||
{
|
||||
static struct Units
|
||||
|
@ -5028,7 +5046,7 @@ public auto units(C)(C[] s)
|
|||
pure nothrow:
|
||||
C[] str;
|
||||
@property C front(){ return str[0]; }
|
||||
@property C back(){ return str[$-1]; }
|
||||
@property C back(){ return str[$-1]; }
|
||||
void popFront(){ str = str[1..$]; }
|
||||
void popBack(){ str = str[0..$-1]; }
|
||||
void popFrontN(size_t n){ str = str[n..$]; }
|
||||
|
@ -5088,7 +5106,7 @@ unittest
|
|||
assert(codec.idx == 2);
|
||||
assert(!utf8.skip(codec));
|
||||
assert(!utf8.skip(codec));
|
||||
|
||||
|
||||
foreach(i; 0..7)
|
||||
{
|
||||
assert(!asc.test(codec));
|
||||
|
@ -5136,9 +5154,9 @@ unittest
|
|||
assert(testAll(utf16, c16));
|
||||
assert(testAll(bmp, c16) || len16 != 1);
|
||||
assert(testAll(nonBmp, c16) || len16 != 2);
|
||||
|
||||
|
||||
assert(testAll(utf8, c8));
|
||||
|
||||
|
||||
//submatchers return false on out of their domain
|
||||
assert(testAll(ascii, c8) || len != 1);
|
||||
assert(testAll(uni2, c8) || len != 2);
|
||||
|
@ -5562,13 +5580,14 @@ unittest
|
|||
}
|
||||
|
||||
// Creates a range of $(D CodepointInterval) that lazily decodes compressed data.
|
||||
@safe package auto decompressIntervals(const(ubyte)[] data)
|
||||
@safe package auto decompressIntervals(const(ubyte)[] data) pure
|
||||
{
|
||||
return DecompressedIntervals(data);
|
||||
}
|
||||
|
||||
@trusted struct DecompressedIntervals
|
||||
{
|
||||
pure:
|
||||
const(ubyte)[] _stream;
|
||||
size_t _idx;
|
||||
CodepointInterval _front;
|
||||
|
@ -5620,7 +5639,7 @@ else
|
|||
{
|
||||
|
||||
// helper for looking up code point sets
|
||||
@trusted ptrdiff_t findUnicodeSet(alias table, C)(in C[] name)
|
||||
@trusted ptrdiff_t findUnicodeSet(alias table, C)(in C[] name) pure
|
||||
{
|
||||
auto range = assumeSorted!((a,b) => propertyNameLess(a,b))
|
||||
(table.map!"a.name"());
|
||||
|
@ -5631,7 +5650,7 @@ else
|
|||
}
|
||||
|
||||
// another one that loads it
|
||||
@trusted bool loadUnicodeSet(alias table, Set, C)(in C[] name, ref Set dest)
|
||||
@trusted bool loadUnicodeSet(alias table, Set, C)(in C[] name, ref Set dest) pure
|
||||
{
|
||||
auto idx = findUnicodeSet!table(name);
|
||||
if(idx >= 0)
|
||||
|
@ -5643,7 +5662,7 @@ else
|
|||
}
|
||||
|
||||
@trusted bool loadProperty(Set=CodepointSet, C)
|
||||
(in C[] name, ref Set target)
|
||||
(in C[] name, ref Set target) pure
|
||||
{
|
||||
alias ucmp = comparePropertyName;
|
||||
// conjure cumulative properties by hand
|
||||
|
@ -5731,9 +5750,9 @@ else
|
|||
target |= asSet(uniProps.So);
|
||||
}
|
||||
else if(ucmp(name, "any") == 0)
|
||||
target = Set(0,0x110000);
|
||||
target = Set.fromIntervals(0, 0x110000);
|
||||
else if(ucmp(name, "ascii") == 0)
|
||||
target = Set(0,0x80);
|
||||
target = Set.fromIntervals(0, 0x80);
|
||||
else
|
||||
return loadUnicodeSet!(uniProps.tab)(name, target);
|
||||
return true;
|
||||
|
@ -5837,7 +5856,7 @@ template SetSearcher(alias table, string kind)
|
|||
---
|
||||
*/
|
||||
|
||||
static @property auto opDispatch(string name)()
|
||||
static @property auto opDispatch(string name)() pure
|
||||
{
|
||||
static if(findAny(name))
|
||||
return loadAny(name);
|
||||
|
@ -5943,7 +5962,7 @@ private:
|
|||
|| (ucmp(name[0..2],"In") == 0 && findSetName!(blocks.tab)(name[2..$]));
|
||||
}
|
||||
|
||||
static auto loadAny(Set=CodepointSet, C)(in C[] name)
|
||||
static auto loadAny(Set=CodepointSet, C)(in C[] name) pure
|
||||
{
|
||||
Set set;
|
||||
bool loaded = loadProperty(name, set) || loadUnicodeSet!(scripts.tab)(name, set)
|
||||
|
@ -8586,7 +8605,7 @@ private:
|
|||
// load static data from pre-generated tables into usable datastructures
|
||||
|
||||
|
||||
@safe auto asSet(const (ubyte)[] compressed)
|
||||
@safe auto asSet(const (ubyte)[] compressed) pure
|
||||
{
|
||||
return CodepointSet.fromIntervals(decompressIntervals(compressed));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue