Fix issue 12455 [reg]Bad lowercase mapping for 'LATIN CAPITAL LETTER I WITH DOT ABOVE'

Also as part of a fix restores a test case in string.d
to exactly match older behaviour.

Some extended greek is not upper but title case, yet changes on toUpper.
This commit is contained in:
Dmitry Olshansky 2014-07-05 01:33:35 +04:00
parent 520c535789
commit c131da5834
3 changed files with 43 additions and 25 deletions

File diff suppressed because one or more lines are too long

View file

@ -1496,7 +1496,7 @@ unittest
assert(s2 !is s1); assert(s2 !is s1);
s1 = to!S("\u0131 \u0130"); s1 = to!S("\u0131 \u0130");
s2 = capitalize(s1); s2 = capitalize(s1);
assert(cmp(s2, "I \u0130") == 0); assert(cmp(s2, "\u0049 \u0069") == 0);
assert(s2 !is s1); assert(s2 !is s1);
s1 = to!S("\u017F \u0049"); s1 = to!S("\u017F \u0049");

View file

@ -7861,12 +7861,14 @@ else
// trusted -> avoid bounds check // trusted -> avoid bounds check
@trusted pure nothrow @trusted pure nothrow
ushort toLowerIndex(dchar c) ushort indexLookup(alias trie)(dchar c)
{ {
alias trie = toLowerIndexTrie;
return trie[c]; return trie[c];
} }
alias toLowerIndex = indexLookup!toLowerIndexTrie;
alias toLowerSimpleIndex = indexLookup!toLowerSimpleIndexTrie;
// trusted -> avoid bounds check // trusted -> avoid bounds check
@trusted pure nothrow @trusted pure nothrow
dchar toLowerTab(size_t idx) dchar toLowerTab(size_t idx)
@ -7874,13 +7876,8 @@ dchar toLowerTab(size_t idx)
return toLowerTable[idx]; return toLowerTable[idx];
} }
// trusted -> avoid bounds check alias toTitleIndex = indexLookup!toTitleIndexTrie;
@trusted pure nothrow alias toTitleSimpleIndex = indexLookup!toTitleSimpleIndexTrie;
ushort toTitleIndex(dchar c)
{
alias trie = toTitleIndexTrie;
return trie[c];
}
// trusted -> avoid bounds check // trusted -> avoid bounds check
@trusted pure nothrow @trusted pure nothrow
@ -7889,13 +7886,8 @@ dchar toTitleTab(size_t idx)
return toTitleTable[idx]; return toTitleTable[idx];
} }
// trusted -> avoid bounds check alias toUpperIndex = indexLookup!toUpperIndexTrie;
@trusted pure nothrow alias toUpperSimpleIndex = indexLookup!toUpperSimpleIndexTrie;
ushort toUpperIndex(dchar c)
{
alias trie = toUpperIndexTrie;
return trie[c];
}
// trusted -> avoid bounds check // trusted -> avoid bounds check
@trusted pure nothrow @trusted pure nothrow
@ -7997,8 +7989,8 @@ dchar toLower(dchar c)
return c + 32; return c + 32;
return c; return c;
} }
size_t idx = toLowerIndex(c); size_t idx = toLowerSimpleIndex(c);
if(idx < MAX_SIMPLE_LOWER) if(idx != ushort.max)
{ {
return toLowerTab(idx); return toLowerTab(idx);
} }
@ -8019,8 +8011,8 @@ private dchar toTitlecase(dchar c)
return c - 32; return c - 32;
return c; return c;
} }
size_t idx = toTitleIndex(c); size_t idx = toTitleSimpleIndex(c);
if(idx < MAX_SIMPLE_TITLE) if(idx != ushort.max)
{ {
return toTitleTab(idx); return toTitleTab(idx);
} }
@ -8454,6 +8446,15 @@ unittest
// Test on wchar and dchar strings. // Test on wchar and dchar strings.
assert(toLower("Some String"w) == "some string"w); assert(toLower("Some String"w) == "some string"w);
assert(toLower("Some String"d) == "some string"d); assert(toLower("Some String"d) == "some string"d);
// bugzilla 12455
dchar c = 'İ'; // '\U0130' LATIN CAPITAL LETTER I WITH DOT ABOVE
assert(isUpper(c));
assert(toLower(c) == 'i');
// extend on 12455 reprot - check simple-case toUpper too
c = '\u1f87';
assert(isLower(c));
assert(toUpper(c) == '\u1F8F');
} }
@ -8477,8 +8478,8 @@ dchar toUpper(dchar c)
return c - 32; return c - 32;
return c; return c;
} }
size_t idx = toUpperIndex(c); size_t idx = toUpperSimpleIndex(c);
if(idx < MAX_SIMPLE_UPPER) if(idx != ushort.max)
{ {
return toUpperTab(idx); return toUpperTab(idx);
} }
@ -8492,10 +8493,12 @@ dchar toUpper(dchar c)
assert(std.ascii.toUpper(ch) == toUpper(ch)); assert(std.ascii.toUpper(ch) == toUpper(ch));
assert(toUpper('я') == 'Я'); assert(toUpper('я') == 'Я');
assert(toUpper('δ') == 'Δ'); assert(toUpper('δ') == 'Δ');
auto title = unicode.Titlecase_Letter;
foreach(ch; unicode.lowerCase.byCodepoint) foreach(ch; unicode.lowerCase.byCodepoint)
{ {
dchar up = ch.toUpper(); dchar up = ch.toUpper();
assert(up == ch || isUpper(up), format("%s -> %s", ch, up)); assert(up == ch || isUpper(up) || title[up],
format("%x -> %x", ch, up));
} }
} }
@ -8994,7 +8997,10 @@ private:
auto toUpperIndexTrie() { static immutable res = asTrie(toUpperIndexTrieEntries); return res; } auto toUpperIndexTrie() { static immutable res = asTrie(toUpperIndexTrieEntries); return res; }
auto toLowerIndexTrie() { static immutable res = asTrie(toLowerIndexTrieEntries); return res; } auto toLowerIndexTrie() { static immutable res = asTrie(toLowerIndexTrieEntries); return res; }
auto toTitleIndexTrie() { static immutable res = asTrie(toTitleIndexTrieEntries); return res; } auto toTitleIndexTrie() { static immutable res = asTrie(toTitleIndexTrieEntries); return res; }
//simple case conversion tables
auto toUpperSimpleIndexTrie() { static immutable res = asTrie(toUpperSimpleIndexTrieEntries); return res; }
auto toLowerSimpleIndexTrie() { static immutable res = asTrie(toLowerSimpleIndexTrieEntries); return res; }
auto toTitleSimpleIndexTrie() { static immutable res = asTrie(toTitleSimpleIndexTrieEntries); return res; }
} }