Added a new popGrapheme function to std.uni (#9053)

* Added a new popGrapheme function to std.uni

* A changelog clarification and fix of inaccurate static condition

* Committing to restart the tests, and adding the new function to the
module doc index while there.
This commit is contained in:
Ate Eskola 2024-10-13 02:24:24 +03:00 committed by GitHub
parent eab6595ade
commit bdedad3bf8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 135 additions and 17 deletions

View file

@ -16,6 +16,7 @@ $(TR $(TD Decode) $(TD
$(LREF byGrapheme)
$(LREF decodeGrapheme)
$(LREF graphemeStride)
$(LREF popGrapheme)
))
$(TR $(TD Comparison) $(TD
$(LREF icmp)
@ -708,8 +709,8 @@ import std.meta : AliasSeq;
import std.range.primitives : back, ElementEncodingType, ElementType, empty,
front, hasLength, hasSlicing, isForwardRange, isInputRange,
isRandomAccessRange, popFront, put, save;
import std.traits : isConvertibleToString, isIntegral, isSomeChar,
isSomeString, Unqual, isDynamicArray;
import std.traits : isAutodecodableString, isConvertibleToString, isIntegral,
isSomeChar, isSomeString, Unqual, isDynamicArray;
// debug = std_uni;
import std.internal.unicode_tables; // generated file
@ -7148,17 +7149,25 @@ private immutable TransformRes
TransformRes.goOn
];
template genericDecodeGrapheme(bool getValue)
{
static if (getValue)
enum GraphemeRet { none, step, value }
template genericDecodeGrapheme(GraphemeRet retType)
{ alias Ret = GraphemeRet;
static if (retType == Ret.value)
alias Value = Grapheme;
else
else static if (retType == Ret.step)
alias Value = size_t;
else static if (retType == Ret.none)
alias Value = void;
Value genericDecodeGrapheme(Input)(ref Input range)
{
static if (getValue)
Grapheme grapheme;
static if (retType == Ret.value)
Grapheme result;
else static if (retType == Ret.step)
size_t result = 0;
auto state = GraphemeState.Start;
dchar ch;
@ -7173,8 +7182,10 @@ template genericDecodeGrapheme(bool getValue)
with(TransformRes)
{
case goOn:
static if (getValue)
grapheme ~= ch;
static if (retType == Ret.value)
result ~= ch;
else static if (retType == Ret.step)
result++;
range.popFront();
continue;
@ -7182,8 +7193,10 @@ template genericDecodeGrapheme(bool getValue)
goto rerun;
case retInclude:
static if (getValue)
grapheme ~= ch;
static if (retType == Ret.value)
result ~= ch;
else static if (retType == Ret.step)
result++;
range.popFront();
break outer;
@ -7192,8 +7205,8 @@ template genericDecodeGrapheme(bool getValue)
}
}
static if (getValue)
return grapheme;
static if (retType != Ret.none)
return result;
}
}
@ -7217,7 +7230,7 @@ if (is(C : dchar))
{
auto src = input[index..$];
auto n = src.length;
genericDecodeGrapheme!(false)(src);
genericDecodeGrapheme!(GraphemeRet.none)(src);
return n - src.length;
}
@ -7279,7 +7292,7 @@ if (is(C : dchar))
Grapheme decodeGrapheme(Input)(ref Input inp)
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
{
return genericDecodeGrapheme!true(inp);
return genericDecodeGrapheme!(GraphemeRet.value)(inp);
}
@safe unittest
@ -7304,6 +7317,73 @@ if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
assert(equal(decodeGrapheme(s)[], "\U0001F1EC\U0001F1E7"));
}
/++
Reads one full grapheme cluster from an
$(REF_ALTTEXT input range, isInputRange, std,range,primitives) of dchar `inp`,
but doesn't return it. Instead returns the number of code units read.
This differs from number of code points read only if `input` is an
autodecodable string.
Note:
This function modifies `inp` and thus `inp`
must be an L-value.
+/
size_t popGrapheme(Input)(ref Input inp)
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
{
static if (isAutodecodableString!Input || hasLength!Input)
{
// Why count each step in the decoder when you can just
// measure the grapheme in one go?
auto n = inp.length;
genericDecodeGrapheme!(GraphemeRet.none)(inp);
return n - inp.length;
}
else return genericDecodeGrapheme!(GraphemeRet.step)(inp);
}
///
@safe pure unittest
{
// Two Union Jacks of the Great Britain in each
string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
// String pop length in code units, not points.
assert(s.popGrapheme() == 8);
assert(ws.popGrapheme() == 4);
assert(ds.popGrapheme() == 2);
assert(s == "\U0001F1EC\U0001F1E7");
assert(ws == "\U0001F1EC\U0001F1E7");
assert(ds == "\U0001F1EC\U0001F1E7");
import std.algorithm.comparison : equal;
import std.algorithm.iteration : filter;
// Also works for non-random access ranges as long as the
// character type is 32-bit.
auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
// Windows-style line ending is two code points in a single grapheme.
assert(testPiece.popGrapheme() == 2);
assert(testPiece.equal("!"d));
}
// Attribute compliance test. Should be nothrow `@nogc` when
// no autodecoding needed.
@safe pure nothrow @nogc unittest
{
import std.algorithm.iteration : filter;
auto str = "abcdef"d;
assert(str.popGrapheme() == 1);
// also test with non-random access
auto filtered = "abcdef"d.filter!(x => x%2);
assert(filtered.popGrapheme() == 1);
}
/++
$(P Iterate a string by $(LREF Grapheme).)
@ -7722,7 +7802,7 @@ public:
@property bool valid()() /*const*/
{
auto r = this[];
genericDecodeGrapheme!false(r);
genericDecodeGrapheme!(GraphemeRet.none)(r);
return r.length == 0;
}