mirror of
https://github.com/dlang/phobos.git
synced 2025-05-02 08:00:48 +03:00
Fix for issue# 8890: commonPrefix does not handle unicode correctly.
It was returning partial code points if the first few code units in a code point matched but not the entire code point.
This commit is contained in:
parent
88052ce7a8
commit
dd5eb5ff97
1 changed files with 97 additions and 15 deletions
112
std/algorithm.d
112
std/algorithm.d
|
@ -317,7 +317,7 @@ module std.algorithm;
|
||||||
import std.c.string, core.bitop;
|
import std.c.string, core.bitop;
|
||||||
import std.array, std.ascii, std.container, std.conv, std.exception,
|
import std.array, std.ascii, std.container, std.conv, std.exception,
|
||||||
std.functional, std.math, std.metastrings, std.range, std.string,
|
std.functional, std.math, std.metastrings, std.range, std.string,
|
||||||
std.traits, std.typecons, std.typetuple, std.uni;
|
std.traits, std.typecons, std.typetuple, std.uni, std.utf;
|
||||||
|
|
||||||
version(unittest)
|
version(unittest)
|
||||||
{
|
{
|
||||||
|
@ -5226,15 +5226,19 @@ Returns the common prefix of two ranges. Example:
|
||||||
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
|
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
|
||||||
----
|
----
|
||||||
|
|
||||||
The type of the result is the same as $(D takeExactly(r1, n)), where
|
For strings, the result is a slice of $(D r1) which contains the characters that
|
||||||
$(D n) is the number of elements that both ranges start with.
|
both strings start with. For all other types, the type of the result is the
|
||||||
|
same as the result of $(D takeExactly(r1, n)), where $(D n) is the number of
|
||||||
|
elements that both ranges start with.
|
||||||
*/
|
*/
|
||||||
auto commonPrefix(alias pred = "a == b", R1, R2)(R1 r1, R2 r2)
|
auto commonPrefix(alias pred = "a == b", R1, R2)(R1 r1, R2 r2)
|
||||||
if (isForwardRange!R1 && isForwardRange!R2)
|
if (isForwardRange!R1 && isForwardRange!R2 &&
|
||||||
|
!isNarrowString!R1 && !isNarrowString!R2 &&
|
||||||
|
is(typeof(binaryFun!pred(r1.front, r2.front))))
|
||||||
{
|
{
|
||||||
static if (isSomeString!R1 && isSomeString!R2
|
static if (isRandomAccessRange!R1 && isRandomAccessRange!R2 &&
|
||||||
&& ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof
|
hasLength!R1 && hasLength!R2 &&
|
||||||
|| isRandomAccessRange!R1 && hasLength!R2)
|
hasSlicing!R1)
|
||||||
{
|
{
|
||||||
immutable limit = min(r1.length, r2.length);
|
immutable limit = min(r1.length, r2.length);
|
||||||
foreach (i; 0 .. limit)
|
foreach (i; 0 .. limit)
|
||||||
|
@ -5250,21 +5254,99 @@ if (isForwardRange!R1 && isForwardRange!R2)
|
||||||
{
|
{
|
||||||
auto result = r1.save;
|
auto result = r1.save;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (; !r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
|
for (;
|
||||||
|
!r1.empty && !r2.empty && binaryFun!pred(r1.front, r2.front);
|
||||||
++i, r1.popFront(), r2.popFront())
|
++i, r1.popFront(), r2.popFront())
|
||||||
{
|
{}
|
||||||
}
|
|
||||||
return takeExactly(result, i);
|
return takeExactly(result, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto commonPrefix(alias pred, R1, R2)(R1 r1, R2 r2)
|
||||||
|
if (isSomeString!R1 && isSomeString!R2 &&
|
||||||
|
!(!isNarrowString!R1 && !isNarrowString!R2) &&
|
||||||
|
is(typeof(binaryFun!pred(r1.front, r2.front))))
|
||||||
|
{
|
||||||
|
auto result = r1.save;
|
||||||
|
immutable len = r1.length;
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
for (size_t j = 0; i < len && !r2.empty; r2.popFront(), i = j)
|
||||||
|
{
|
||||||
|
immutable f = decode(r1, j);
|
||||||
|
if (!binaryFun!pred(f, r2.front))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result[0 .. i];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto commonPrefix(R1, R2)(R1 r1, R2 r2)
|
||||||
|
if (isSomeString!R1 && isSomeString!R2 && !(!isNarrowString!R1 && !isNarrowString!R2))
|
||||||
|
{
|
||||||
|
static if (ElementEncodingType!R1.sizeof == ElementEncodingType!R2.sizeof)
|
||||||
|
{
|
||||||
|
immutable limit = min(r1.length, r2.length);
|
||||||
|
for (size_t i = 0; i < limit;)
|
||||||
|
{
|
||||||
|
immutable codeLen = std.utf.stride(r1, i);
|
||||||
|
size_t j = 0;
|
||||||
|
|
||||||
|
for (; j < codeLen && i < limit; ++i, ++j)
|
||||||
|
{
|
||||||
|
if (r1[i] != r2[i])
|
||||||
|
return r1[0 .. i - j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == limit && j < codeLen)
|
||||||
|
throw new UTFException("Invalid UTF-8 sequence", i);
|
||||||
|
}
|
||||||
|
return r1[0 .. limit];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return commonPrefix!"a == b"(r1, r2);
|
||||||
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
{
|
{
|
||||||
assert(commonPrefix("hello, world", "hello, there") == "hello, ");
|
assert(commonPrefix([1, 2, 3], [1, 2, 3, 4, 5]) == [1, 2, 3]);
|
||||||
assert(commonPrefix("hello, ", "hello, world") == "hello, ");
|
assert(commonPrefix([1, 2, 3, 4, 5], [1, 2, 3]) == [1, 2, 3]);
|
||||||
assert(equal(commonPrefix("hello, world", "hello, there"w), "hello, "));
|
assert(commonPrefix([1, 2, 3, 4], [1, 2, 3, 4]) == [1, 2, 3, 4]);
|
||||||
assert(equal(commonPrefix("hello, world"w, "hello, there"), "hello, "));
|
assert(commonPrefix([1, 2, 3], [7, 2, 3, 4, 5]).empty);
|
||||||
assert(equal(commonPrefix("hello, world", "hello, there"d), "hello, "));
|
assert(commonPrefix([7, 2, 3, 4, 5], [1, 2, 3]).empty);
|
||||||
|
assert(commonPrefix([1, 2, 3], cast(int[])null).empty);
|
||||||
|
assert(commonPrefix(cast(int[])null, [1, 2, 3]).empty);
|
||||||
|
assert(commonPrefix(cast(int[])null, cast(int[])null).empty);
|
||||||
|
|
||||||
|
foreach (S; TypeTuple!(char[], const(char)[], string,
|
||||||
|
wchar[], const(wchar)[], wstring,
|
||||||
|
dchar[], const(dchar)[], dstring))
|
||||||
|
{
|
||||||
|
foreach(T; TypeTuple!(string, wstring, dstring))
|
||||||
|
{
|
||||||
|
assert(commonPrefix(to!S(""), to!T("")).empty);
|
||||||
|
assert(commonPrefix(to!S(""), to!T("hello")).empty);
|
||||||
|
assert(commonPrefix(to!S("hello"), to!T("")).empty);
|
||||||
|
assert(commonPrefix(to!S("hello, world"), to!T("hello, there")) == to!S("hello, "));
|
||||||
|
assert(commonPrefix(to!S("hello, there"), to!T("hello, world")) == to!S("hello, "));
|
||||||
|
assert(commonPrefix(to!S("hello, "), to!T("hello, world")) == to!S("hello, "));
|
||||||
|
assert(commonPrefix(to!S("hello, world"), to!T("hello, ")) == to!S("hello, "));
|
||||||
|
assert(commonPrefix(to!S("hello, world"), to!T("hello, world")) == to!S("hello, world"));
|
||||||
|
|
||||||
|
//Bug# 8890
|
||||||
|
assert(commonPrefix(to!S("Пиво"), to!T("Пони"))== to!S("П"));
|
||||||
|
assert(commonPrefix(to!S("Пони"), to!T("Пиво"))== to!S("П"));
|
||||||
|
assert(commonPrefix(to!S("Пиво"), to!T("Пиво"))== to!S("Пиво"));
|
||||||
|
assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFE"),
|
||||||
|
to!T("\U0010FFFF\U0010FFFB\U0010FFFC")) == to!S("\U0010FFFF\U0010FFFB"));
|
||||||
|
assert(commonPrefix(to!S("\U0010FFFF\U0010FFFB\U0010FFFC"),
|
||||||
|
to!T("\U0010FFFF\U0010FFFB\U0010FFFE")) == to!S("\U0010FFFF\U0010FFFB"));
|
||||||
|
assert(commonPrefix!"a != b"(to!S("Пиво"), to!T("онво")) == to!S("Пи"));
|
||||||
|
assert(commonPrefix!"a != b"(to!S("онво"), to!T("Пиво")) == to!S("он"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertThrown!UTFException(commonPrefix("\U0010FFFF\U0010FFFB", "\U0010FFFF\U0010FFFB"[0 .. $ - 1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
// findAdjacent
|
// findAdjacent
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue