Make it so std.utf can be tested with no autodecoding.

This commit is contained in:
Steven Schveighoffer 2020-08-06 20:07:57 -04:00 committed by The Dlang Bot
parent 1f612dff31
commit 17ae8fc9be
5 changed files with 184 additions and 97 deletions

View file

@ -241,7 +241,7 @@ STD_MODULES=$(call P2MODULES,$(STD_PACKAGES))
# NoAutodecode test modules. # NoAutodecode test modules.
# List all modules whose unittests are known to work without autodecode enabled. # List all modules whose unittests are known to work without autodecode enabled.
NO_AUTODECODE_MODULES= NO_AUTODECODE_MODULES= std/utf
# Other D modules that aren't under std/ # Other D modules that aren't under std/
EXTRA_MODULES_COMMON := $(addprefix etc/c/,curl odbc/sql odbc/sqlext \ EXTRA_MODULES_COMMON := $(addprefix etc/c/,curl odbc/sql odbc/sqlext \

View file

@ -887,12 +887,23 @@ template equal(alias pred = "a == b")
enum hasFixedLength(T) = hasLength!T || isNarrowString!T; enum hasFixedLength(T) = hasLength!T || isNarrowString!T;
// use code points when comparing two ranges of UTF code units that aren't
// the same type. This is for backwards compatibility with autodecode
// strings.
enum useCodePoint(R1, R2) =
isSomeChar!(ElementEncodingType!R1) && isSomeChar!(ElementEncodingType!R2) &&
(ElementEncodingType!R1).sizeof != (ElementEncodingType!R2).sizeof;
/++ /++
Compares two ranges for equality. The ranges may have Compares two ranges for equality. The ranges may have
different element types, as long as `pred(r1.front, r2.front)` different element types, as long as `pred(r1.front, r2.front)`
evaluates to `bool`. evaluates to `bool`.
Performs $(BIGOH min(r1.length, r2.length)) evaluations of `pred`. Performs $(BIGOH min(r1.length, r2.length)) evaluations of `pred`.
If the two ranges are different kinds of UTF code unit (`char`, `wchar`, or
`dchar`), then the arrays are compared using UTF decoding to avoid
accidentally integer-promoting units.
Params: Params:
r1 = The first range to be compared. r1 = The first range to be compared.
r2 = The second range to be compared. r2 = The second range to be compared.
@ -902,7 +913,8 @@ template equal(alias pred = "a == b")
for element, according to binary predicate `pred`. for element, according to binary predicate `pred`.
+/ +/
bool equal(Range1, Range2)(Range1 r1, Range2 r2) bool equal(Range1, Range2)(Range1 r1, Range2 r2)
if (isInputRange!Range1 && isInputRange!Range2 && if (!useCodePoint!(Range1, Range2) &&
isInputRange!Range1 && isInputRange!Range2 &&
is(typeof(binaryFun!pred(r1.front, r2.front)))) is(typeof(binaryFun!pred(r1.front, r2.front))))
{ {
static assert(!(isInfinite!Range1 && isInfinite!Range2), static assert(!(isInfinite!Range1 && isInfinite!Range2),
@ -928,7 +940,7 @@ template equal(alias pred = "a == b")
// can be avoided if they have the same ElementEncodingType // can be avoided if they have the same ElementEncodingType
else static if (is(typeof(pred) == string) && pred == "a == b" && else static if (is(typeof(pred) == string) && pred == "a == b" &&
isAutodecodableString!Range1 != isAutodecodableString!Range2 && isAutodecodableString!Range1 != isAutodecodableString!Range2 &&
is(ElementEncodingType!Range1 == ElementEncodingType!Range2)) is(immutable ElementEncodingType!Range1 == immutable ElementEncodingType!Range2))
{ {
import std.utf : byCodeUnit; import std.utf : byCodeUnit;
@ -968,6 +980,14 @@ template equal(alias pred = "a == b")
return r2.empty; return r2.empty;
} }
} }
/// ditto
bool equal(Range1, Range2)(Range1 r1, Range2 r2)
if (useCodePoint!(Range1, Range2))
{
import std.utf : byDchar;
return equal(r1.byDchar, r2.byDchar);
}
} }
/// ///
@ -1073,20 +1093,28 @@ range of range (of range...) comparisons.
@safe @nogc pure unittest @safe @nogc pure unittest
{ {
import std.utf : byChar, byDchar; import std.utf : byChar, byDchar, byWchar;
assert(equal("æøå".byChar, "æøå")); assert(equal("æøå".byChar, "æøå"));
assert(equal("æøå".byChar, "æøå"w));
assert(equal("æøå".byChar, "æøå"d));
assert(equal("æøå", "æøå".byChar)); assert(equal("æøå", "æøå".byChar));
assert(equal("æøå".byDchar, "æøå"d)); assert(equal("æøå"w, "æøå".byChar));
assert(equal("æøå"d, "æøå".byDchar)); assert(equal("æøå"d, "æøå".byChar));
}
@safe pure unittest
{
import std.utf : byWchar;
assert(equal("æøå".byWchar, "æøå"));
assert(equal("æøå".byWchar, "æøå"w)); assert(equal("æøå".byWchar, "æøå"w));
assert(equal("æøå".byWchar, "æøå"d));
assert(equal("æøå", "æøå".byWchar));
assert(equal("æøå"w, "æøå".byWchar)); assert(equal("æøå"w, "æøå".byWchar));
assert(equal("æøå"d, "æøå".byWchar));
assert(equal("æøå".byDchar, "æøå"));
assert(equal("æøå".byDchar, "æøå"w));
assert(equal("æøå".byDchar, "æøå"d));
assert(equal("æøå", "æøå".byDchar));
assert(equal("æøå"w, "æøå".byDchar));
assert(equal("æøå"d, "æøå".byDchar));
} }
@safe @nogc pure unittest @safe @nogc pure unittest

View file

@ -92,7 +92,9 @@ public import std.range.primitives : save, empty, popFront, popBack, front, back
* Allocates an array and initializes it with copies of the elements * Allocates an array and initializes it with copies of the elements
* of range `r`. * of range `r`.
* *
* Narrow strings are handled as a special case in an overload. * Narrow strings are handled as follows:
* - If autodecoding is turned on (default), then they are handled as a separate overload.
* - If autodecoding is turned off, then this is equivalent to duplicating the array.
* *
* Params: * Params:
* r = range (or aggregate with `opApply` function) whose elements are copied into the allocated array * r = range (or aggregate with `opApply` function) whose elements are copied into the allocated array
@ -100,7 +102,7 @@ public import std.range.primitives : save, empty, popFront, popBack, front, back
* allocated and initialized array * allocated and initialized array
*/ */
ForeachType!Range[] array(Range)(Range r) ForeachType!Range[] array(Range)(Range r)
if (isIterable!Range && !isNarrowString!Range && !isInfinite!Range) if (isIterable!Range && !isAutodecodableString!Range && !isInfinite!Range)
{ {
if (__ctfe) if (__ctfe)
{ {
@ -145,7 +147,7 @@ if (isIterable!Range && !isNarrowString!Range && !isInfinite!Range)
/// ditto /// ditto
ForeachType!(PointerTarget!Range)[] array(Range)(Range r) ForeachType!(PointerTarget!Range)[] array(Range)(Range r)
if (isPointer!Range && isIterable!(PointerTarget!Range) && !isNarrowString!Range && !isInfinite!Range) if (isPointer!Range && isIterable!(PointerTarget!Range) && !isAutodecodableString!Range && !isInfinite!Range)
{ {
return array(*r); return array(*r);
} }
@ -252,8 +254,11 @@ version (StdUnittest)
} }
/** /**
Convert a narrow string to an array type that fully supports random access. Convert a narrow autodecoding string to an array type that fully supports
This is handled as a special case and always returns an array of `dchar` random access. This is handled as a special case and always returns an array
of `dchar`
NOTE: This function is never used when autodecoding is turned off.
Params: Params:
str = `isNarrowString` to be converted to an array of `dchar` str = `isNarrowString` to be converted to an array of `dchar`
@ -262,7 +267,7 @@ Returns:
the input. the input.
*/ */
CopyTypeQualifiers!(ElementType!String,dchar)[] array(String)(scope String str) CopyTypeQualifiers!(ElementType!String,dchar)[] array(String)(scope String str)
if (isNarrowString!String) if (isAutodecodableString!String)
{ {
import std.utf : toUTF32; import std.utf : toUTF32;
auto temp = str.toUTF32; auto temp = str.toUTF32;
@ -276,10 +281,19 @@ if (isNarrowString!String)
@safe unittest @safe unittest
{ {
import std.range.primitives : isRandomAccessRange; import std.range.primitives : isRandomAccessRange;
import std.traits : isAutodecodableString;
assert("Hello D".array == "Hello D"d); // note that if autodecoding is turned off, `array` will not transcode these.
static if (isAutodecodableString!string)
assert("Hello D".array == "Hello D"d);
else
assert("Hello D".array == "Hello D");
static if (isAutodecodableString!wstring)
assert("Hello D"w.array == "Hello D"d);
else
assert("Hello D"w.array == "Hello D"w);
assert("Hello D"w.array == "Hello D"d);
static assert(isRandomAccessRange!dstring == true); static assert(isRandomAccessRange!dstring == true);
} }
@ -339,8 +353,11 @@ if (isNarrowString!String)
assert(e == f); assert(e == f);
assert(array(OpApply.init) == [0,1,2,3,4,5,6,7,8,9]); assert(array(OpApply.init) == [0,1,2,3,4,5,6,7,8,9]);
assert(array("ABC") == "ABC"d); static if (isAutodecodableString!string)
assert(array("ABC".dup) == "ABC"d.dup); {
assert(array("ABC") == "ABC"d);
assert(array("ABC".dup) == "ABC"d.dup);
}
} }
// https://issues.dlang.org/show_bug.cgi?id=8233 // https://issues.dlang.org/show_bug.cgi?id=8233
@ -597,11 +614,11 @@ if (isInputRange!Values && isInputRange!Keys)
auto r = "abcde".enumerate.filter!(a => a.index == 2); auto r = "abcde".enumerate.filter!(a => a.index == 2);
auto a = assocArray(r.map!(a => a.value), r.map!(a => a.index)); auto a = assocArray(r.map!(a => a.value), r.map!(a => a.index));
assert(is(typeof(a) == size_t[dchar]));
static if (autodecodeStrings) static if (autodecodeStrings)
alias achar = dchar; alias achar = dchar;
else else
alias achar = immutable(char); alias achar = immutable(char);
static assert(is(typeof(a) == size_t[achar]));
assert(a == [achar('c'): size_t(2)]); assert(a == [achar('c'): size_t(2)]);
} }
@ -1265,7 +1282,7 @@ if (isSomeString!(T[]) && allSatisfy!(isCharOrStringOrDcharRange, U))
static if (is(Unqual!T == T) static if (is(Unqual!T == T)
&& allSatisfy!(isInputRangeWithLengthOrConvertible!dchar, U)) && allSatisfy!(isInputRangeWithLengthOrConvertible!dchar, U))
{ {
import std.utf : codeLength; import std.utf : codeLength, byDchar;
// mutable, can do in place // mutable, can do in place
//helper function: re-encode dchar to Ts and store at *ptr //helper function: re-encode dchar to Ts and store at *ptr
static T* putDChar(T* ptr, dchar ch) static T* putDChar(T* ptr, dchar ch)
@ -1330,7 +1347,7 @@ if (isSomeString!(T[]) && allSatisfy!(isCharOrStringOrDcharRange, U))
} }
else else
{ {
foreach (dchar ch; stuff[i]) foreach (ch; stuff[i].byDchar)
ptr = putDChar(ptr, ch); ptr = putDChar(ptr, ch);
} }
} }
@ -1942,7 +1959,9 @@ ElementEncodingType!(ElementType!RoR)[] join(RoR, R)(RoR ror, scope R sep)
if (isInputRange!RoR && if (isInputRange!RoR &&
isInputRange!(Unqual!(ElementType!RoR)) && isInputRange!(Unqual!(ElementType!RoR)) &&
isInputRange!R && isInputRange!R &&
is(immutable ElementType!(ElementType!RoR) == immutable ElementType!R)) (is(immutable ElementType!(ElementType!RoR) == immutable ElementType!R) ||
(isSomeChar!(ElementType!(ElementType!RoR)) && isSomeChar!(ElementType!R))
))
{ {
alias RetType = typeof(return); alias RetType = typeof(return);
alias RetTypeElement = Unqual!(ElementEncodingType!RetType); alias RetTypeElement = Unqual!(ElementEncodingType!RetType);
@ -2019,7 +2038,9 @@ if (isInputRange!RoR &&
ElementEncodingType!(ElementType!RoR)[] join(RoR, E)(RoR ror, scope E sep) ElementEncodingType!(ElementType!RoR)[] join(RoR, E)(RoR ror, scope E sep)
if (isInputRange!RoR && if (isInputRange!RoR &&
isInputRange!(Unqual!(ElementType!RoR)) && isInputRange!(Unqual!(ElementType!RoR)) &&
is(E : ElementType!(ElementType!RoR))) ((is(E : ElementType!(ElementType!RoR))) ||
(!autodecodeStrings && isSomeChar!(ElementType!(ElementType!RoR)) &&
isSomeChar!E)))
{ {
alias RetType = typeof(return); alias RetType = typeof(return);
alias RetTypeElement = Unqual!(ElementEncodingType!RetType); alias RetTypeElement = Unqual!(ElementEncodingType!RetType);
@ -2175,21 +2196,12 @@ if (isInputRange!RoR &&
auto arr2 = "Здравствуй Мир Unicode".to!(T); auto arr2 = "Здравствуй Мир Unicode".to!(T);
auto arr = ["Здравствуй", "Мир", "Unicode"].to!(T[]); auto arr = ["Здравствуй", "Мир", "Unicode"].to!(T[]);
assert(join(arr) == "ЗдравствуйМирUnicode"); assert(join(arr) == "ЗдравствуйМирUnicode");
static if (autodecodeStrings) static foreach (S; AliasSeq!(char,wchar,dchar))
{ {{
static foreach (S; AliasSeq!(char,wchar,dchar)) auto jarr = arr.join(to!S(' '));
{{ static assert(is(typeof(jarr) == T));
auto jarr = arr.join(to!S(' ')); assert(jarr == arr2);
static assert(is(typeof(jarr) == T)); }}
assert(jarr == arr2);
}}
}
else
{
// Turning off autodecode means the join() won't
// just convert arr[] to dchar, so mixing char
// types fails to compile.
}
static foreach (S; AliasSeq!(string,wstring,dstring)) static foreach (S; AliasSeq!(string,wstring,dstring))
{{ {{
auto jarr = arr.join(to!S(" ")); auto jarr = arr.join(to!S(" "));

View file

@ -2406,10 +2406,19 @@ if (isAutodecodableString!(T[]) && !isAggregateType!(T[]))
} }
/** /**
Autodecoding is enabled if this is set to true. EXPERIMENTAL: to try out removing autodecoding, set the version
`NoAutodecodeStrings`. Most things are expected to fail with this version
currently.
*/ */
version (NoAutodecodeStrings)
enum autodecodeStrings = true; {
enum autodecodeStrings = false;
}
else
{
///
enum autodecodeStrings = true;
}
/** /**
Implements the range interface primitive `front` for built-in Implements the range interface primitive `front` for built-in

144
std/utf.d
View file

@ -66,7 +66,7 @@ import core.exception : UnicodeException;
import std.meta : AliasSeq; import std.meta : AliasSeq;
import std.range.primitives; import std.range.primitives;
import std.traits : isAutodecodableString, isPointer, isSomeChar, import std.traits : isAutodecodableString, isPointer, isSomeChar,
isSomeString, isStaticArray, Unqual; isSomeString, isStaticArray, Unqual, isConvertibleToString;
import std.typecons : Flag, Yes, No; import std.typecons : Flag, Yes, No;
@ -463,7 +463,8 @@ if (is(S : const wchar[]))
/// Ditto /// Ditto
uint stride(S)(auto ref S str) uint stride(S)(auto ref S str)
if (isInputRange!S && is(immutable ElementType!S == immutable wchar)) if (isInputRange!S && is(immutable ElementType!S == immutable wchar) &&
!is(S : const wchar[]))
{ {
assert(!str.empty, "UTF-16 sequence is empty"); assert(!str.empty, "UTF-16 sequence is empty");
immutable uint u = str.front; immutable uint u = str.front;
@ -1873,11 +1874,12 @@ version (StdUnittest) private void testDecode(R)(R range,
import core.exception : AssertError; import core.exception : AssertError;
import std.exception : enforce; import std.exception : enforce;
import std.string : format; import std.string : format;
import std.traits : isNarrowString;
static if (hasLength!R) static if (hasLength!R)
immutable lenBefore = range.length; immutable lenBefore = range.length;
static if (isRandomAccessRange!R) static if (isRandomAccessRange!R && !isNarrowString!R)
{ {
{ {
immutable result = decode(range, index); immutable result = decode(range, index);
@ -2105,11 +2107,10 @@ version (StdUnittest) private void testBadDecodeBack(R)(R range, size_t line = _
@system unittest @system unittest
{ {
import std.conv : to;
import std.exception; import std.exception;
assertCTFEable!( assertCTFEable!(
{ {
foreach (S; AliasSeq!(to!wstring, InputCU!wchar, RandomCU!wchar, foreach (S; AliasSeq!((wstring s) => s, InputCU!wchar, RandomCU!wchar,
(wstring s) => new RefBidirCU!wchar(s), (wstring s) => new RefBidirCU!wchar(s),
(wstring s) => new RefRandomCU!wchar(s))) (wstring s) => new RefRandomCU!wchar(s)))
{ {
@ -2144,7 +2145,7 @@ version (StdUnittest) private void testBadDecodeBack(R)(R range, size_t line = _
} }
} }
foreach (S; AliasSeq!(to!wstring, RandomCU!wchar, (wstring s) => new RefRandomCU!wchar(s))) foreach (S; AliasSeq!((wchar[] s) => s.idup, RandomCU!wchar, (wstring s) => new RefRandomCU!wchar(s)))
{ {
auto str = S([cast(wchar) 0xD800, cast(wchar) 0xDC00, auto str = S([cast(wchar) 0xD800, cast(wchar) 0xDC00,
cast(wchar) 0x1400, cast(wchar) 0x1400,
@ -2161,11 +2162,10 @@ version (StdUnittest) private void testBadDecodeBack(R)(R range, size_t line = _
@system unittest @system unittest
{ {
import std.conv : to;
import std.exception; import std.exception;
assertCTFEable!( assertCTFEable!(
{ {
foreach (S; AliasSeq!(to!dstring, RandomCU!dchar, InputCU!dchar, foreach (S; AliasSeq!((dstring s) => s, RandomCU!dchar, InputCU!dchar,
(dstring s) => new RefBidirCU!dchar(s), (dstring s) => new RefBidirCU!dchar(s),
(dstring s) => new RefRandomCU!dchar(s))) (dstring s) => new RefRandomCU!dchar(s)))
{ {
@ -2202,7 +2202,7 @@ version (StdUnittest) private void testBadDecodeBack(R)(R range, size_t line = _
} }
} }
foreach (S; AliasSeq!(to!dstring, RandomCU!dchar, (dstring s) => new RefRandomCU!dchar(s))) foreach (S; AliasSeq!((dchar[] s) => s.idup, RandomCU!dchar, (dstring s) => new RefRandomCU!dchar(s)))
{ {
auto str = S([cast(dchar) 0x10000, cast(dchar) 0x1400, cast(dchar) 0xB9DDE]); auto str = S([cast(dchar) 0x10000, cast(dchar) 0x1400, cast(dchar) 0xB9DDE]);
testDecode(str, 0, 0x10000, 1); testDecode(str, 0, 0x10000, 1);
@ -2398,7 +2398,8 @@ size_t encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)(
assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
assert(encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000) == buf.stride); assert(encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000) == buf.stride);
assert(buf.front == replacementDchar); enum replacementDcharString = "\uFFFD";
assert(buf[0 .. replacementDcharString.length] == replacementDcharString);
}); });
} }
@ -2616,9 +2617,11 @@ void encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)(
assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF));
assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
assert(buf.back != replacementDchar); enum replacementDcharString = "\uFFFD";
enum rdcslen = replacementDcharString.length;
assert(buf[$ - rdcslen .. $] != replacementDcharString);
encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
assert(buf.back == replacementDchar); assert(buf[$ - rdcslen .. $] == replacementDcharString);
}); });
} }
@ -2774,7 +2777,7 @@ if (isSomeChar!C)
The number of code units in `input` when encoded to `C` The number of code units in `input` when encoded to `C`
+/ +/
size_t codeLength(C, InputRange)(InputRange input) size_t codeLength(C, InputRange)(InputRange input)
if (isInputRange!InputRange && !isInfinite!InputRange && is(ElementType!InputRange : dchar)) if (isInputRange!InputRange && !isInfinite!InputRange && isSomeChar!(ElementType!InputRange))
{ {
alias EncType = Unqual!(ElementEncodingType!InputRange); alias EncType = Unqual!(ElementEncodingType!InputRange);
static if (isSomeString!InputRange && is(EncType == C) && is(typeof(input.length))) static if (isSomeString!InputRange && is(EncType == C) && is(typeof(input.length)))
@ -2783,7 +2786,7 @@ if (isInputRange!InputRange && !isInfinite!InputRange && is(ElementType!InputRan
{ {
size_t total = 0; size_t total = 0;
foreach (dchar c; input) foreach (c; input.byDchar)
total += codeLength!C(c); total += codeLength!C(c);
return total; return total;
@ -2793,20 +2796,19 @@ if (isInputRange!InputRange && !isInfinite!InputRange && is(ElementType!InputRan
/// ///
@safe unittest @safe unittest
{ {
import std.conv : to;
assert(codeLength!char("hello world") == assert(codeLength!char("hello world") ==
to!string("hello world").length); "hello world".length);
assert(codeLength!wchar("hello world") == assert(codeLength!wchar("hello world") ==
to!wstring("hello world").length); "hello world"w.length);
assert(codeLength!dchar("hello world") == assert(codeLength!dchar("hello world") ==
to!dstring("hello world").length); "hello world"d.length);
assert(codeLength!char(`プログラミング`) == assert(codeLength!char(`プログラミング`) ==
to!string(`プログラミング`).length); `プログラミング`.length);
assert(codeLength!wchar(`プログラミング`) == assert(codeLength!wchar(`プログラミング`) ==
to!wstring(`プログラミング`).length); `プログラミング`w.length);
assert(codeLength!dchar(`プログラミング`) == assert(codeLength!dchar(`プログラミング`) ==
to!dstring(`プログラミング`).length); `プログラミング`d.length);
string haystack = `Être sans la verité, ça, ce ne serait pas bien.`; string haystack = `Être sans la verité, ça, ce ne serait pas bien.`;
wstring needle = `Être sans la verité`; wstring needle = `Être sans la verité`;
@ -2949,8 +2951,9 @@ if (isInputRange!S && !isInfinite!S && isSomeChar!(ElementEncodingType!S))
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.internal.test.dummyrange : ReferenceInputRange; import std.internal.test.dummyrange : ReferenceInputRange;
auto r1 = new ReferenceInputRange!dchar("Hellø"); alias RT = ReferenceInputRange!(ElementType!(string));
auto r2 = new ReferenceInputRange!dchar("𐐷"); auto r1 = new RT("Hellø");
auto r2 = new RT("𐐷");
assert(r1.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); assert(r1.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));
assert(r2.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); assert(r2.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
@ -2991,8 +2994,9 @@ if (isInputRange!S && !isInfinite!S && isSomeChar!(ElementEncodingType!S))
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.internal.test.dummyrange : ReferenceInputRange; import std.internal.test.dummyrange : ReferenceInputRange;
auto r1 = new ReferenceInputRange!dchar("𤭢"); alias RT = ReferenceInputRange!(ElementType!(string));
auto r2 = new ReferenceInputRange!dchar("𐐷"); auto r1 = new RT("𤭢");
auto r2 = new RT("𐐷");
assert(r1.toUTF16.equal([0xD852, 0xDF62])); assert(r1.toUTF16.equal([0xD852, 0xDF62]));
assert(r2.toUTF16.equal([0xD801, 0xDC37])); assert(r2.toUTF16.equal([0xD801, 0xDC37]));
@ -3369,10 +3373,10 @@ if (isSomeChar!C)
Throws: Throws:
`UTFException` if `str` is not well-formed. `UTFException` if `str` is not well-formed.
+/ +/
size_t count(C)(const(C)[] str) @trusted pure nothrow @nogc size_t count(C)(const(C)[] str) @safe pure nothrow @nogc
if (isSomeChar!C) if (isSomeChar!C)
{ {
return walkLength(str); return walkLength(str.byDchar);
} }
/// ///
@ -3553,13 +3557,11 @@ enum dchar replacementDchar = '\uFFFD';
* $(REF byGrapheme, std,uni). * $(REF byGrapheme, std,uni).
*/ */
auto byCodeUnit(R)(R r) auto byCodeUnit(R)(R r)
if (isAutodecodableString!R || if ((isConvertibleToString!R && !isStaticArray!R) ||
isInputRange!R && isSomeChar!(ElementEncodingType!R) || (isInputRange!R && isSomeChar!(ElementEncodingType!R)))
(is(R : const dchar[]) && !isStaticArray!R))
{ {
import std.traits : isNarrowString, StringTypeOf; import std.traits : StringTypeOf;
static if (isNarrowString!R || static if (// This would be cleaner if we had a way to check whether a type
// This would be cleaner if we had a way to check whether a type
// was a range without any implicit conversions. // was a range without any implicit conversions.
(isAutodecodableString!R && !__traits(hasMember, R, "empty") && (isAutodecodableString!R && !__traits(hasMember, R, "empty") &&
!__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront"))) !__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront")))
@ -3590,8 +3592,9 @@ if (isAutodecodableString!R ||
return ByCodeUnitImpl(r); return ByCodeUnitImpl(r);
} }
else static if (is(R : const dchar[]) && !__traits(hasMember, R, "empty") && else static if (!isInputRange!R ||
!__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront")) (is(R : const dchar[]) && !__traits(hasMember, R, "empty") &&
!__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront")))
{ {
return cast(StringTypeOf!R) r; return cast(StringTypeOf!R) r;
} }
@ -3606,6 +3609,7 @@ if (isAutodecodableString!R ||
@safe unittest @safe unittest
{ {
import std.range.primitives; import std.range.primitives;
import std.traits : isAutodecodableString;
auto r = "Hello, World!".byCodeUnit(); auto r = "Hello, World!".byCodeUnit();
static assert(hasLength!(typeof(r))); static assert(hasLength!(typeof(r)));
@ -3613,14 +3617,27 @@ if (isAutodecodableString!R ||
static assert(isRandomAccessRange!(typeof(r))); static assert(isRandomAccessRange!(typeof(r)));
static assert(is(ElementType!(typeof(r)) == immutable char)); static assert(is(ElementType!(typeof(r)) == immutable char));
// contrast with the range capabilities of standard strings // contrast with the range capabilities of standard strings (with or
// without autodecoding enabled).
auto s = "Hello, World!"; auto s = "Hello, World!";
static assert(isBidirectionalRange!(typeof(r))); static assert(isBidirectionalRange!(typeof(r)));
static assert(is(ElementType!(typeof(s)) == dchar)); static if (isAutodecodableString!(typeof(s)))
{
static assert(!isRandomAccessRange!(typeof(s))); // with autodecoding enabled, strings are non-random-access ranges of
static assert(!hasSlicing!(typeof(s))); // dchar.
static assert(!hasLength!(typeof(s))); static assert(is(ElementType!(typeof(s)) == dchar));
static assert(!isRandomAccessRange!(typeof(s)));
static assert(!hasSlicing!(typeof(s)));
static assert(!hasLength!(typeof(s)));
}
else
{
// without autodecoding, strings are normal arrays.
static assert(is(ElementType!(typeof(s)) == immutable char));
static assert(isRandomAccessRange!(typeof(s)));
static assert(hasSlicing!(typeof(s)));
static assert(hasLength!(typeof(s)));
}
} }
/// `byCodeUnit` does no Unicode decoding /// `byCodeUnit` does no Unicode decoding
@ -3641,12 +3658,16 @@ if (isAutodecodableString!R ||
{ {
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.range : popFrontN; import std.range : popFrontN;
import std.traits : isAutodecodableString;
{ {
auto range = byCodeUnit("hello world"); auto range = byCodeUnit("hello world");
range.popFrontN(3); range.popFrontN(3);
assert(equal(range.save, "lo world")); assert(equal(range.save, "lo world"));
string str = range.source; static if (isAutodecodableString!string) // only enabled with autodecoding
assert(str == "lo world"); {
string str = range.source;
assert(str == "lo world");
}
} }
// source only exists if the range was wrapped // source only exists if the range was wrapped
{ {
@ -3705,7 +3726,7 @@ if (isAutodecodableString!R ||
{ {
auto bcu = "hello".byCodeUnit().byCodeUnit(); auto bcu = "hello".byCodeUnit().byCodeUnit();
static assert(isForwardRange!(typeof(bcu))); static assert(isForwardRange!(typeof(bcu)));
static assert(is(typeof(bcu) == struct)); static assert(is(typeof(bcu) == struct) == isAutodecodableString!string);
auto s = bcu.save; auto s = bcu.save;
bcu.popFront(); bcu.popFront();
assert(s.front == 'h'); assert(s.front == 'h');
@ -3714,7 +3735,7 @@ if (isAutodecodableString!R ||
auto bcu = "hello".byCodeUnit(); auto bcu = "hello".byCodeUnit();
static assert(hasSlicing!(typeof(bcu))); static assert(hasSlicing!(typeof(bcu)));
static assert(isBidirectionalRange!(typeof(bcu))); static assert(isBidirectionalRange!(typeof(bcu)));
static assert(is(typeof(bcu) == struct)); static assert(is(typeof(bcu) == struct) == isAutodecodableString!string);
static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit())));
auto ret = bcu.retro; auto ret = bcu.retro;
assert(ret.front == 'o'); assert(ret.front == 'o');
@ -3725,7 +3746,7 @@ if (isAutodecodableString!R ||
auto bcu = "κόσμε"w.byCodeUnit(); auto bcu = "κόσμε"w.byCodeUnit();
static assert(hasSlicing!(typeof(bcu))); static assert(hasSlicing!(typeof(bcu)));
static assert(isBidirectionalRange!(typeof(bcu))); static assert(isBidirectionalRange!(typeof(bcu)));
static assert(is(typeof(bcu) == struct)); static assert(is(typeof(bcu) == struct) == isAutodecodableString!wstring);
static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit())));
auto ret = bcu.retro; auto ret = bcu.retro;
assert(ret.front == 'ε'); assert(ret.front == 'ε');
@ -3742,7 +3763,7 @@ if (isAutodecodableString!R ||
auto orig = Stringish("\U0010fff8 𐁊 foo 𐂓"); auto orig = Stringish("\U0010fff8 𐁊 foo 𐂓");
auto bcu = orig.byCodeUnit(); auto bcu = orig.byCodeUnit();
static assert(is(typeof(bcu) == struct)); static assert(is(typeof(bcu) == struct));
static assert(!is(typeof(bcu) == Stringish)); static assert(!is(typeof(bcu) == Stringish) == isAutodecodableString!Stringish);
static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit())));
static assert(is(ElementType!(typeof(bcu)) == immutable char)); static assert(is(ElementType!(typeof(bcu)) == immutable char));
assert(bcu.front == cast(char) 244); assert(bcu.front == cast(char) 244);
@ -3757,7 +3778,7 @@ if (isAutodecodableString!R ||
auto orig = WStringish("\U0010fff8 𐁊 foo 𐂓"w); auto orig = WStringish("\U0010fff8 𐁊 foo 𐂓"w);
auto bcu = orig.byCodeUnit(); auto bcu = orig.byCodeUnit();
static assert(is(typeof(bcu) == struct)); static assert(is(typeof(bcu) == struct));
static assert(!is(typeof(bcu) == WStringish)); static assert(!is(typeof(bcu) == WStringish) == isAutodecodableString!WStringish);
static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit())));
static assert(is(ElementType!(typeof(bcu)) == immutable wchar)); static assert(is(ElementType!(typeof(bcu)) == immutable wchar));
assert(bcu.front == cast(wchar) 56319); assert(bcu.front == cast(wchar) 56319);
@ -3786,7 +3807,10 @@ if (isAutodecodableString!R ||
auto orig = FuncStringish("\U0010fff8 𐁊 foo 𐂓"); auto orig = FuncStringish("\U0010fff8 𐁊 foo 𐂓");
auto bcu = orig.byCodeUnit(); auto bcu = orig.byCodeUnit();
static assert(is(typeof(bcu) == struct)); static if (isAutodecodableString!FuncStringish)
static assert(is(typeof(bcu) == struct));
else
static assert(is(typeof(bcu) == string));
static assert(!is(typeof(bcu) == FuncStringish)); static assert(!is(typeof(bcu) == FuncStringish));
static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit())));
static assert(is(ElementType!(typeof(bcu)) == immutable char)); static assert(is(ElementType!(typeof(bcu)) == immutable char));
@ -3903,7 +3927,10 @@ if (isAutodecodableString!R ||
auto orig = Enum.a; auto orig = Enum.a;
auto bcu = orig.byCodeUnit(); auto bcu = orig.byCodeUnit();
static assert(!is(typeof(bcu) == Enum)); static assert(!is(typeof(bcu) == Enum));
static assert(is(typeof(bcu) == struct)); static if (isAutodecodableString!Enum)
static assert(is(typeof(bcu) == struct));
else
static assert(is(typeof(bcu) == string));
static assert(is(ElementType!(typeof(bcu)) == immutable char)); static assert(is(ElementType!(typeof(bcu)) == immutable char));
assert(bcu.front == 't'); assert(bcu.front == 't');
} }
@ -3913,7 +3940,10 @@ if (isAutodecodableString!R ||
auto orig = WEnum.a; auto orig = WEnum.a;
auto bcu = orig.byCodeUnit(); auto bcu = orig.byCodeUnit();
static assert(!is(typeof(bcu) == WEnum)); static assert(!is(typeof(bcu) == WEnum));
static assert(is(typeof(bcu) == struct)); static if (isAutodecodableString!WEnum)
static assert(is(typeof(bcu) == struct));
else
static assert(is(typeof(bcu) == wstring));
static assert(is(ElementType!(typeof(bcu)) == immutable wchar)); static assert(is(ElementType!(typeof(bcu)) == immutable wchar));
assert(bcu.front == 't'); assert(bcu.front == 't');
} }
@ -3927,8 +3957,16 @@ if (isAutodecodableString!R ||
assert(bcu.front == 't'); assert(bcu.front == 't');
} }
static assert(!is(typeof(byCodeUnit("hello")) == string)); static if (autodecodeStrings)
static assert(!is(typeof(byCodeUnit("hello"w)) == wstring)); {
static assert(!is(typeof(byCodeUnit("hello")) == string));
static assert(!is(typeof(byCodeUnit("hello"w)) == wstring));
}
else
{
static assert(is(typeof(byCodeUnit("hello")) == string));
static assert(is(typeof(byCodeUnit("hello"w)) == wstring));
}
static assert(is(typeof(byCodeUnit("hello"d)) == dstring)); static assert(is(typeof(byCodeUnit("hello"d)) == dstring));
static assert(!__traits(compiles, byCodeUnit((char[5]).init))); static assert(!__traits(compiles, byCodeUnit((char[5]).init)));