diff --git a/std/algorithm.d b/std/algorithm.d index bb1417af5..d1067d3f3 100644 --- a/std/algorithm.d +++ b/std/algorithm.d @@ -3738,7 +3738,7 @@ if (isInputRange!InputRange && //Note: "needle <= 0x7F" properly handles sign via unsigned promotion static if (is(UEEType == char)) { - if (!__ctfe && needle <= 0x7F) + if (!__ctfe && canSearchInCodeUnits!char(needle)) { static R trustedMemchr(ref R haystack, ref E needle) @trusted nothrow pure { @@ -3754,7 +3754,7 @@ if (isInputRange!InputRange && //Ditto, but for UTF16 static if (is(UEEType == wchar)) { - if (needle <= 0xD7FF || (0xE000 <= needle && needle <= 0xFFFF)) + if (canSearchInCodeUnits!wchar(needle)) { foreach (i, ref EEType e; haystack) { diff --git a/std/string.d b/std/string.d index 1ed13ca2e..ed9767ddd 100644 --- a/std/string.d +++ b/std/string.d @@ -575,7 +575,7 @@ ptrdiff_t lastIndexOf(Char)(const(Char)[] s, { if (cs == CaseSensitive.yes) { - if (std.ascii.isASCII(c)) + if (canSearchInCodeUnits!Char(c)) { foreach_reverse (i, it; s) { @@ -1565,9 +1565,9 @@ unittest S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure if (isSomeString!S) { - alias typeof(s[0]) C; + alias C = ElementEncodingType!S; - if (cast(dchar)(cast(C)fillChar) == fillChar) + if (canSearchInCodeUnits!C(fillChar)) { immutable len = s.walkLength(); if (len >= width) @@ -1600,9 +1600,9 @@ S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure if (isSomeString!S) { - alias typeof(s[0]) C; + alias C = ElementEncodingType!S; - if (cast(dchar)(cast(C)fillChar) == fillChar) + if (canSearchInCodeUnits!C(fillChar)) { immutable len = s.walkLength(); if (len >= width) @@ -1635,9 +1635,9 @@ S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure S center(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure if (isSomeString!S) { - alias typeof(s[0]) C; + alias C = ElementEncodingType!S; - if (cast(dchar)(cast(C)fillChar) == fillChar) + if (canSearchInCodeUnits!C(fillChar)) { immutable len = s.walkLength(); if (len >= width) @@ -1690,6 +1690,10 @@ unittest assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100"); assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello"); assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100"); + + assert(leftJustify(s, 8, 'ö') == "helloööö"); + assert(rightJustify(s, 8, 'ö') == "öööhello"); + assert(center(s, 8, 'ö') == "öhelloöö"); } }); } @@ -1746,10 +1750,7 @@ S detab(S)(S s, size_t tabSize = 8) @trusted pure L1: if (changes) { - if (cast(dchar)(cast(C)c) == c) - result ~= cast(C)c; - else - std.utf.encode(result, c); + std.utf.encode(result, c); } break; } @@ -1870,10 +1871,7 @@ S entab(S)(S s, size_t tabSize = 8) @trusted pure } if (changes) { - if (cast(dchar)(cast(C)c) == c) - result ~= cast(C)c; - else - std.utf.encode(result, c); + std.utf.encode(result, c); } } diff --git a/std/utf.d b/std/utf.d index 3f8295969..071edecd2 100644 --- a/std/utf.d +++ b/std/utf.d @@ -1952,6 +1952,45 @@ unittest }); } +/+ +Internal helper function: + +Returns true if it is safe to search for the Codepoint $(D c) inside +code units, without decoding. + +This is a runtime check that is used an optimization in various functions, +particularly, in $(D std.string). + +/ +package bool canSearchInCodeUnits(C)(dchar c) +if (isSomeChar!C) +{ + static if (C.sizeof == 1) + return c <= 0x7F; + else static if (C.sizeof == 2) + return c <= 0xD7FF || (0xE000 <= c && c <= 0xFFFF); + else static if (C.sizeof == 4) + return true; + else + static assert(0); +} +unittest +{ + assert( canSearchInCodeUnits! char('a')); + assert( canSearchInCodeUnits!wchar('a')); + assert( canSearchInCodeUnits!dchar('a')); + assert(!canSearchInCodeUnits! char('ö')); //Important test: ö <= 0xFF + assert(!canSearchInCodeUnits! char(cast(char)'ö')); //Important test: ö <= 0xFF + assert( canSearchInCodeUnits!wchar('ö')); + assert( canSearchInCodeUnits!dchar('ö')); + assert(!canSearchInCodeUnits! char('日')); + assert( canSearchInCodeUnits!wchar('日')); + assert( canSearchInCodeUnits!dchar('日')); + assert(!canSearchInCodeUnits!wchar(cast(wchar)0xDA00)); + assert( canSearchInCodeUnits!dchar(cast(dchar)0xDA00)); + assert(!canSearchInCodeUnits! char('\U00010001')); + assert(!canSearchInCodeUnits!wchar('\U00010001')); + assert( canSearchInCodeUnits!dchar('\U00010001')); +} /* =================== Validation ======================= */