mirror of
https://github.com/dlang/phobos.git
synced 2025-05-01 15:40:36 +03:00
Fix issue 19238 - Allow splitter on random-access ranges of characters that aren't
character arrays.
This commit is contained in:
parent
e211db679a
commit
f7154ec86a
1 changed files with 127 additions and 17 deletions
|
@ -5001,53 +5001,113 @@ private struct SplitterResult(alias isTerminator, Range)
|
||||||
}
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
Lazily splits the string `s` into words, using whitespace as the delimiter.
|
Lazily splits the character-based range `s` into words, using whitespace as the
|
||||||
|
delimiter.
|
||||||
|
|
||||||
This function is string specific and, contrary to
|
This function is character-range specific and, contrary to
|
||||||
`splitter!(std.uni.isWhite)`, runs of whitespace will be merged together
|
`splitter!(std.uni.isWhite)`, runs of whitespace will be merged together
|
||||||
(no empty tokens will be produced).
|
(no empty tokens will be produced).
|
||||||
|
|
||||||
Params:
|
Params:
|
||||||
s = The string to be split.
|
s = The character-based range to be split. Must be a string, or a
|
||||||
|
random-access range of character types.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of slices of
|
An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of slices of
|
||||||
the original string split by whitespace.
|
the original range split by whitespace.
|
||||||
+/
|
+/
|
||||||
auto splitter(C)(C[] s)
|
auto splitter(Range)(Range s)
|
||||||
if (isSomeChar!C)
|
if (isSomeString!Range ||
|
||||||
|
isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
|
||||||
|
!isConvertibleToString!Range &&
|
||||||
|
isSomeChar!(ElementEncodingType!Range))
|
||||||
{
|
{
|
||||||
import std.algorithm.searching : find;
|
import std.algorithm.searching : find;
|
||||||
static struct Result
|
static struct Result
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
import core.exception : RangeError;
|
import core.exception : RangeError;
|
||||||
C[] _s;
|
Range _s;
|
||||||
size_t _frontLength;
|
size_t _frontLength;
|
||||||
|
|
||||||
void getFirst() pure @safe
|
void getFirst()
|
||||||
{
|
{
|
||||||
import std.uni : isWhite;
|
import std.uni : isWhite;
|
||||||
|
import std.traits : Unqual;
|
||||||
|
|
||||||
auto r = find!(isWhite)(_s);
|
static if (is(Unqual!(ElementEncodingType!Range) == wchar) &&
|
||||||
|
is(Unqual!(ElementType!Range) == dchar))
|
||||||
|
{
|
||||||
|
// all unicode whitespace characters fit into a wchar. However,
|
||||||
|
// this range is a wchar array, so we will treat it like a
|
||||||
|
// wchar array instead of decoding each code point.
|
||||||
|
_frontLength = _s.length; // default condition, no spaces
|
||||||
|
foreach (i; 0 .. _s.length)
|
||||||
|
if (isWhite(_s[i]))
|
||||||
|
{
|
||||||
|
_frontLength = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else static if (is(Unqual!(ElementType!Range) == dchar) ||
|
||||||
|
is(Unqual!(ElementType!Range) == wchar))
|
||||||
|
{
|
||||||
|
// dchar or wchar range, we can just use find.
|
||||||
|
auto r = find!(isWhite)(_s.save);
|
||||||
_frontLength = _s.length - r.length;
|
_frontLength = _s.length - r.length;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// need to decode the characters until we find a space. This is
|
||||||
|
// ported from std.string.stripLeft.
|
||||||
|
static import std.ascii;
|
||||||
|
static import std.uni;
|
||||||
|
import std.utf : decodeFront;
|
||||||
|
|
||||||
|
auto input = _s.save;
|
||||||
|
size_t iLength = input.length;
|
||||||
|
|
||||||
|
while (!input.empty)
|
||||||
|
{
|
||||||
|
auto c = input.front;
|
||||||
|
if (std.ascii.isASCII(c))
|
||||||
|
{
|
||||||
|
if (std.ascii.isWhite(c))
|
||||||
|
break;
|
||||||
|
input.popFront();
|
||||||
|
--iLength;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto dc = decodeFront(input);
|
||||||
|
if (std.uni.isWhite(dc))
|
||||||
|
break;
|
||||||
|
iLength = input.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sanity check
|
||||||
|
assert(iLength <= _s.length);
|
||||||
|
|
||||||
|
_frontLength = _s.length - iLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
this(C[] s) pure @safe
|
this(Range s)
|
||||||
{
|
{
|
||||||
import std.string : strip;
|
import std.string : stripLeft;
|
||||||
_s = s.strip();
|
_s = s.stripLeft();
|
||||||
getFirst();
|
getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
@property C[] front() pure @safe
|
@property auto front()
|
||||||
{
|
{
|
||||||
version(assert) if (empty) throw new RangeError();
|
version(assert) if (empty) throw new RangeError();
|
||||||
return _s[0 .. _frontLength];
|
return _s[0 .. _frontLength];
|
||||||
}
|
}
|
||||||
|
|
||||||
void popFront() pure @safe
|
void popFront()
|
||||||
{
|
{
|
||||||
import std.string : stripLeft;
|
import std.string : stripLeft;
|
||||||
version(assert) if (empty) throw new RangeError();
|
version(assert) if (empty) throw new RangeError();
|
||||||
|
@ -5055,7 +5115,7 @@ if (isSomeChar!C)
|
||||||
getFirst();
|
getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
@property bool empty() const @safe pure nothrow
|
@property bool empty() const
|
||||||
{
|
{
|
||||||
return _s.empty;
|
return _s.empty;
|
||||||
}
|
}
|
||||||
|
@ -5083,7 +5143,7 @@ if (isSomeChar!C)
|
||||||
static foreach (S; AliasSeq!(string, wstring, dstring))
|
static foreach (S; AliasSeq!(string, wstring, dstring))
|
||||||
{{
|
{{
|
||||||
import std.conv : to;
|
import std.conv : to;
|
||||||
S a = " a bcd ef gh ";
|
S a = " a \u2028 bcd ef gh ";
|
||||||
assert(equal(splitter(a), [to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")]));
|
assert(equal(splitter(a), [to!S("a"), to!S("bcd"), to!S("ef"), to!S("gh")]));
|
||||||
a = "";
|
a = "";
|
||||||
assert(splitter(a).empty);
|
assert(splitter(a).empty);
|
||||||
|
@ -5119,6 +5179,56 @@ if (isSomeChar!C)
|
||||||
assert(dictionary["two"]== 2);
|
assert(dictionary["two"]== 2);
|
||||||
assert(dictionary["yah"]== 3);
|
assert(dictionary["yah"]== 3);
|
||||||
assert(dictionary["last"]== 4);
|
assert(dictionary["last"]== 4);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@safe unittest
|
||||||
|
{
|
||||||
|
// do it with byCodeUnit
|
||||||
|
import std.conv : to;
|
||||||
|
import std.string : strip;
|
||||||
|
import std.utf : byCodeUnit;
|
||||||
|
|
||||||
|
alias BCU = typeof("abc".byCodeUnit());
|
||||||
|
|
||||||
|
// TDPL example, page 8
|
||||||
|
uint[BCU] dictionary;
|
||||||
|
BCU[3] lines;
|
||||||
|
lines[0] = "line one".byCodeUnit;
|
||||||
|
lines[1] = "line \ttwo".byCodeUnit;
|
||||||
|
lines[2] = "yah last line\ryah".byCodeUnit;
|
||||||
|
foreach (line; lines)
|
||||||
|
{
|
||||||
|
foreach (word; splitter(strip(line)))
|
||||||
|
{
|
||||||
|
static assert(is(typeof(word) == BCU));
|
||||||
|
if (word in dictionary) continue; // Nothing to do
|
||||||
|
auto newID = dictionary.length;
|
||||||
|
dictionary[word] = cast(uint) newID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(dictionary.length == 5);
|
||||||
|
assert(dictionary["line".byCodeUnit]== 0);
|
||||||
|
assert(dictionary["one".byCodeUnit]== 1);
|
||||||
|
assert(dictionary["two".byCodeUnit]== 2);
|
||||||
|
assert(dictionary["yah".byCodeUnit]== 3);
|
||||||
|
assert(dictionary["last".byCodeUnit]== 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@safe pure unittest
|
||||||
|
{
|
||||||
|
// issue 19238
|
||||||
|
import std.utf : byCodeUnit;
|
||||||
|
import std.algorithm.comparison : equal;
|
||||||
|
auto range = "hello world".byCodeUnit.splitter;
|
||||||
|
static assert(is(typeof(range.front()) == typeof("hello".byCodeUnit())));
|
||||||
|
assert(range.equal(["hello".byCodeUnit, "world".byCodeUnit]));
|
||||||
|
|
||||||
|
// test other space types, including unicode
|
||||||
|
auto u = " a\t\v\r bcd\u3000 \u2028\t\nef\U00010001 gh";
|
||||||
|
assert(equal(splitter(u), ["a", "bcd", "ef\U00010001", "gh"][]));
|
||||||
|
assert(equal(splitter(u.byCodeUnit), ["a".byCodeUnit, "bcd".byCodeUnit,
|
||||||
|
"ef\U00010001".byCodeUnit, "gh".byCodeUnit][]));
|
||||||
}
|
}
|
||||||
|
|
||||||
@safe unittest
|
@safe unittest
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue