Introduce package level canSearchInCodeUnits

And use it to solve bugs/simplify code.
This commit is contained in:
monarchdodra 2013-08-25 11:10:21 +02:00
parent ec25c68f47
commit ed1cfe527b
3 changed files with 54 additions and 17 deletions

View file

@ -3738,7 +3738,7 @@ if (isInputRange!InputRange &&
//Note: "needle <= 0x7F" properly handles sign via unsigned promotion
static if (is(UEEType == char))
{
if (!__ctfe && needle <= 0x7F)
if (!__ctfe && canSearchInCodeUnits!char(needle))
{
static R trustedMemchr(ref R haystack, ref E needle) @trusted nothrow pure
{
@ -3754,7 +3754,7 @@ if (isInputRange!InputRange &&
//Ditto, but for UTF16
static if (is(UEEType == wchar))
{
if (needle <= 0xD7FF || (0xE000 <= needle && needle <= 0xFFFF))
if (canSearchInCodeUnits!wchar(needle))
{
foreach (i, ref EEType e; haystack)
{

View file

@ -575,7 +575,7 @@ ptrdiff_t lastIndexOf(Char)(const(Char)[] s,
{
if (cs == CaseSensitive.yes)
{
if (std.ascii.isASCII(c))
if (canSearchInCodeUnits!Char(c))
{
foreach_reverse (i, it; s)
{
@ -1565,9 +1565,9 @@ unittest
S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
if (isSomeString!S)
{
alias typeof(s[0]) C;
alias C = ElementEncodingType!S;
if (cast(dchar)(cast(C)fillChar) == fillChar)
if (canSearchInCodeUnits!C(fillChar))
{
immutable len = s.walkLength();
if (len >= width)
@ -1600,9 +1600,9 @@ S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
if (isSomeString!S)
{
alias typeof(s[0]) C;
alias C = ElementEncodingType!S;
if (cast(dchar)(cast(C)fillChar) == fillChar)
if (canSearchInCodeUnits!C(fillChar))
{
immutable len = s.walkLength();
if (len >= width)
@ -1635,9 +1635,9 @@ S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
S center(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
if (isSomeString!S)
{
alias typeof(s[0]) C;
alias C = ElementEncodingType!S;
if (cast(dchar)(cast(C)fillChar) == fillChar)
if (canSearchInCodeUnits!C(fillChar))
{
immutable len = s.walkLength();
if (len >= width)
@ -1690,6 +1690,10 @@ unittest
assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
assert(leftJustify(s, 8, 'ö') == "helloööö");
assert(rightJustify(s, 8, 'ö') == "öööhello");
assert(center(s, 8, 'ö') == "öhelloöö");
}
});
}
@ -1746,10 +1750,7 @@ S detab(S)(S s, size_t tabSize = 8) @trusted pure
L1:
if (changes)
{
if (cast(dchar)(cast(C)c) == c)
result ~= cast(C)c;
else
std.utf.encode(result, c);
std.utf.encode(result, c);
}
break;
}
@ -1870,10 +1871,7 @@ S entab(S)(S s, size_t tabSize = 8) @trusted pure
}
if (changes)
{
if (cast(dchar)(cast(C)c) == c)
result ~= cast(C)c;
else
std.utf.encode(result, c);
std.utf.encode(result, c);
}
}

View file

@ -1952,6 +1952,45 @@ unittest
});
}
/+
Internal helper function:
Returns true if it is safe to search for the Codepoint $(D c) inside
code units, without decoding.
This is a runtime check that is used an optimization in various functions,
particularly, in $(D std.string).
+/
package bool canSearchInCodeUnits(C)(dchar c)
if (isSomeChar!C)
{
static if (C.sizeof == 1)
return c <= 0x7F;
else static if (C.sizeof == 2)
return c <= 0xD7FF || (0xE000 <= c && c <= 0xFFFF);
else static if (C.sizeof == 4)
return true;
else
static assert(0);
}
unittest
{
assert( canSearchInCodeUnits! char('a'));
assert( canSearchInCodeUnits!wchar('a'));
assert( canSearchInCodeUnits!dchar('a'));
assert(!canSearchInCodeUnits! char('ö')); //Important test: ö <= 0xFF
assert(!canSearchInCodeUnits! char(cast(char)'ö')); //Important test: ö <= 0xFF
assert( canSearchInCodeUnits!wchar('ö'));
assert( canSearchInCodeUnits!dchar('ö'));
assert(!canSearchInCodeUnits! char('日'));
assert( canSearchInCodeUnits!wchar('日'));
assert( canSearchInCodeUnits!dchar('日'));
assert(!canSearchInCodeUnits!wchar(cast(wchar)0xDA00));
assert( canSearchInCodeUnits!dchar(cast(dchar)0xDA00));
assert(!canSearchInCodeUnits! char('\U00010001'));
assert(!canSearchInCodeUnits!wchar('\U00010001'));
assert( canSearchInCodeUnits!dchar('\U00010001'));
}
/* =================== Validation ======================= */