Add splitter functionality that allows to keep the sentinels

Signed-off-by: Robert Aron <aronrobert293@gmail.com>

Update the Return: section of DDoc

Fix whitespace
This commit is contained in:
Robert Aron 2020-08-01 12:43:52 +03:00 committed by The Dlang Bot
parent fb6267b6b3
commit d8f3d3a815

View file

@ -75,7 +75,7 @@ module std.algorithm.iteration;
import std.functional : unaryFun, binaryFun;
import std.range.primitives;
import std.traits;
import std.typecons : Flag;
import std.typecons : Flag, Yes, No;
private template aggregate(fun...)
if (fun.length >= 1)
@ -4824,6 +4824,7 @@ Params:
s = The element (or range) to be treated as the separator
between range segments to be split.
isTerminator = The predicate for deciding where to split the range when no separator is passed
keepSeparators = The flag for deciding if the separators are kept
Constraints:
The predicate `pred` needs to accept an element of `r` and the
@ -4836,6 +4837,9 @@ Returns:
the returned range will be likewise.
When a range is used a separator, bidirectionality isn't possible.
If keepSeparators is equal to Yes.keepSeparators the output will also contain the
separators.
If an empty range is given, the result is an empty range. If a range with
one separator is given, the result is a range with two empty elements.
@ -4844,7 +4848,10 @@ See_Also:
$(REF _split, std,array) for a version that splits eagerly and
$(LREF splitWhen), which compares adjacent elements instead of element against separator.
*/
auto splitter(alias pred = "a == b", Range, Separator)(Range r, Separator s)
auto splitter(alias pred = "a == b",
Flag!"keepSeparators" keepSeparators = No.keepSeparators,
Range,
Separator)(Range r, Separator s)
if (is(typeof(binaryFun!pred(r.front, s)) : bool)
&& ((hasSlicing!Range && hasLength!Range) || isNarrowString!Range))
{
@ -4870,6 +4877,11 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
enum _separatorLength = 1;
}
static if (keepSeparators)
{
bool _wasSeparator = true;
}
static if (isBidirectionalRange!Range)
{
size_t lastIndexOf(Range haystack, Separator needle)
@ -4911,10 +4923,27 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
@property Range front()
{
assert(!empty, "Attempting to fetch the front of an empty splitter.");
if (_frontLength == _unComputed)
static if (keepSeparators)
{
auto r = _input.find!pred(_separator);
_frontLength = _input.length - r.length;
if (!_wasSeparator)
{
_frontLength = _separatorLength;
_wasSeparator = true;
}
else if (_frontLength == _unComputed)
{
auto r = _input.find!pred(_separator);
_frontLength = _input.length - r.length;
_wasSeparator = false;
}
}
else
{
if (_frontLength == _unComputed)
{
auto r = _input.find!pred(_separator);
_frontLength = _input.length - r.length;
}
}
return _input[0 .. _frontLength];
}
@ -4928,18 +4957,35 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
}
assert(_frontLength <= _input.length, "The front position must"
~ " not exceed the input.length");
if (_frontLength == _input.length)
static if (keepSeparators)
{
// no more input and need to fetch => done
_frontLength = _atEnd;
if (_frontLength == _input.length && !_wasSeparator)
{
_frontLength = _atEnd;
// Probably don't need this, but just for consistency:
_backLength = _atEnd;
_backLength = _atEnd;
}
else
{
_input = _input[_frontLength .. _input.length];
_frontLength = _unComputed;
}
}
else
{
_input = _input[_frontLength + _separatorLength .. _input.length];
_frontLength = _unComputed;
if (_frontLength == _input.length)
{
// no more input and need to fetch => done
_frontLength = _atEnd;
// Probably don't need this, but just for consistency:
_backLength = _atEnd;
}
else
{
_input = _input[_frontLength + _separatorLength .. _input.length];
_frontLength = _unComputed;
}
}
}
@ -4958,16 +5004,40 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
@property Range back()
{
assert(!empty, "Attempting to fetch the back of an empty splitter.");
if (_backLength == _unComputed)
static if (keepSeparators)
{
immutable lastIndex = lastIndexOf(_input, _separator);
if (lastIndex == -1)
if (!_wasSeparator)
{
_backLength = _input.length;
_backLength = _separatorLength;
_wasSeparator = true;
}
else
else if (_backLength == _unComputed)
{
_backLength = _input.length - lastIndex - 1;
immutable lastIndex = lastIndexOf(_input, _separator);
if (lastIndex == -1)
{
_backLength = _input.length;
}
else
{
_backLength = _input.length - lastIndex - 1;
}
_wasSeparator = false;
}
}
else
{
if (_backLength == _unComputed)
{
immutable lastIndex = lastIndexOf(_input, _separator);
if (lastIndex == -1)
{
_backLength = _input.length;
}
else
{
_backLength = _input.length - lastIndex - 1;
}
}
}
return _input[_input.length - _backLength .. _input.length];
@ -4983,16 +5053,32 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
}
assert(_backLength <= _input.length, "The end index must not"
~ " exceed the length of the input");
if (_backLength == _input.length)
static if (keepSeparators)
{
// no more input and need to fetch => done
_frontLength = _atEnd;
_backLength = _atEnd;
if (_backLength == _input.length && !_wasSeparator)
{
_frontLength = _atEnd;
_backLength = _atEnd;
}
else
{
_input = _input[0 .. _input.length - _backLength];
_backLength = _unComputed;
}
}
else
{
_input = _input[0 .. _input.length - _backLength - _separatorLength];
_backLength = _unComputed;
if (_backLength == _input.length)
{
// no more input and need to fetch => done
_frontLength = _atEnd;
_backLength = _atEnd;
}
else
{
_input = _input[0 .. _input.length - _backLength - _separatorLength];
_backLength = _unComputed;
}
}
}
}
@ -5013,6 +5099,20 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert(a.splitter(0).equal(w));
}
/// Basic splitting with characters and numbers and keeping sentinels.
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
assert("a|bc|def".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "a", "|", "bc", "|", "def" ]));
int[] a = [1, 0, 2, 3, 0, 4, 5, 6];
int[][] w = [ [1], [0], [2, 3], [0], [4, 5, 6] ];
assert(a.splitter!("a == b", Yes.keepSeparators)(0).equal(w));
}
/// Adjacent separators.
@safe unittest
{
@ -5029,6 +5129,27 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert(a.splitter(0).equal(w));
}
/// Adjacent separators and keeping sentinels.
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
assert("|ab|".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "", "|", "ab", "|", "" ]));
assert("ab".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "ab" ]));
assert("a|b||c".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "a", "|", "b", "|", "", "|", "c" ]));
assert("hello world".splitter!("a == b", Yes.keepSeparators)(' ')
.equal([ "hello", " ", "", " ", "world" ]));
auto a = [ 1, 2, 0, 0, 3, 0, 4, 5, 0 ];
auto w = [ [1, 2], [0], [], [0], [3], [0], [4, 5], [0], [] ];
assert(a.splitter!("a == b", Yes.keepSeparators)(0).equal(w));
}
/// Empty and separator-only ranges.
@safe unittest
{
@ -5040,6 +5161,20 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert("||".splitter('|').equal([ "", "", "" ]));
}
/// Empty and separator-only ranges and keeping sentinels.
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
import std.range : empty;
assert("".splitter!("a == b", Yes.keepSeparators)('|').empty);
assert("|".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "", "|", "" ]));
assert("||".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "", "|", "", "|", "" ]));
}
/// Use a range for splitting
@safe unittest
{
@ -5060,6 +5195,32 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert(a.splitter([0, 0]).equal([ [], [1] ]));
}
/// Use a range for splitting
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
assert("a=>bc=>def".splitter!("a == b", Yes.keepSeparators)("=>")
.equal([ "a", "=>", "bc", "=>", "def" ]));
assert("a|b||c".splitter!("a == b", Yes.keepSeparators)("||")
.equal([ "a|b", "||", "c" ]));
assert("hello world".splitter!("a == b", Yes.keepSeparators)(" ")
.equal([ "hello", " ", "world" ]));
int[] a = [ 1, 2, 0, 0, 3, 0, 4, 5, 0 ];
int[][] w = [ [1, 2], [0, 0], [3, 0, 4, 5, 0] ];
assert(a.splitter!("a == b", Yes.keepSeparators)([0, 0]).equal(w));
a = [ 0, 0 ];
assert(a.splitter!("a == b", Yes.keepSeparators)([0, 0])
.equal([ (int[]).init, [0, 0], (int[]).init ]));
a = [ 0, 0, 1 ];
assert(a.splitter!("a == b", Yes.keepSeparators)([0, 0])
.equal([ [], [0, 0], [1] ]));
}
/// Custom predicate functions.
@safe unittest
{
@ -5073,6 +5234,21 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert(w.splitter!"a.front == b"(1).equal([ [[0]], [[2]] ]));
}
/// Custom predicate functions.
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
import std.ascii : toLower;
assert("abXcdxef".splitter!("a.toLower == b", Yes.keepSeparators)('x')
.equal([ "ab", "X", "cd", "x", "ef" ]));
auto w = [ [0], [1], [2] ];
assert(w.splitter!("a.front == b", Yes.keepSeparators)(1)
.equal([ [[0]], [[1]], [[2]] ]));
}
/// Use splitter without a separator
@safe unittest
{
@ -5105,6 +5281,18 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert("ab".splitter('|').equal([ "ab" ]));
}
/// Leading separators, trailing separators, or no separators.
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
assert("|ab|".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "", "|", "ab", "|", "" ]));
assert("ab".splitter!("a == b", Yes.keepSeparators)('|')
.equal([ "ab" ]));
}
/// Splitter returns bidirectional ranges if the delimiter is a single element
@safe unittest
{
@ -5113,6 +5301,16 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
assert("a|bc|def".splitter('|').retro.equal([ "def", "bc", "a" ]));
}
/// Splitter returns bidirectional ranges if the delimiter is a single element
@safe unittest
{
import std.algorithm.comparison : equal;
import std.typecons : Yes;
import std.range : retro;
assert("a|bc|def".splitter!("a == b", Yes.keepSeparators)('|')
.retro.equal([ "def", "|", "bc", "|", "a" ]));
}
/// Splitting by word lazily
@safe unittest
{
@ -5221,7 +5419,10 @@ if (is(typeof(binaryFun!pred(r.front, s)) : bool)
}
/// ditto
auto splitter(alias pred = "a == b", Range, Separator)(Range r, Separator s)
auto splitter(alias pred = "a == b",
Flag!"keepSeparators" keepSeparators = No.keepSeparators,
Range,
Separator)(Range r, Separator s)
if (is(typeof(binaryFun!pred(r.front, s.front)) : bool)
&& (hasSlicing!Range || isNarrowString!Range)
&& isForwardRange!Separator
@ -5238,15 +5439,38 @@ if (is(typeof(binaryFun!pred(r.front, s.front)) : bool)
// _frontLength == size_t.max means empty
size_t _frontLength = size_t.max;
static if (keepSeparators)
{
bool _wasSeparator = true;
}
@property auto separatorLength() { return _separator.length; }
void ensureFrontLength()
{
if (_frontLength != _frontLength.max) return;
assert(!_input.empty, "The input must not be empty");
// compute front length
_frontLength = (_separator.empty) ? 1 :
static if (keepSeparators)
{
assert(!_input.empty || _wasSeparator, "The input must not be empty");
if (_wasSeparator)
{
_frontLength = _input.length -
find!pred(_input, _separator).length;
_wasSeparator = false;
}
else
{
_frontLength = separatorLength();
_wasSeparator = true;
}
}
else
{
assert(!_input.empty, "The input must not be empty");
// compute front length
_frontLength = (_separator.empty) ? 1 :
_input.length - find!pred(_input, _separator).length;
}
}
public:
@ -5271,7 +5495,14 @@ if (is(typeof(binaryFun!pred(r.front, s.front)) : bool)
{
@property bool empty()
{
return _frontLength == size_t.max && _input.empty;
static if (keepSeparators)
{
return _frontLength == size_t.max && _input.empty && !_wasSeparator;
}
else
{
return _frontLength == size_t.max && _input.empty;
}
}
}
@ -5279,24 +5510,32 @@ if (is(typeof(binaryFun!pred(r.front, s.front)) : bool)
{
assert(!empty, "Attempting to popFront an empty splitter.");
ensureFrontLength();
if (_frontLength == _input.length)
static if (keepSeparators)
{
// done, there's no separator in sight
_input = _input[_frontLength .. _frontLength];
_frontLength = _frontLength.max;
return;
_input = _input[_frontLength .. _input.length];
}
if (_frontLength + separatorLength == _input.length)
else
{
// Special case: popping the first-to-last item; there is
// an empty item right after this.
_input = _input[_input.length .. _input.length];
_frontLength = 0;
return;
if (_frontLength == _input.length)
{
// done, there's no separator in sight
_input = _input[_frontLength .. _frontLength];
_frontLength = _frontLength.max;
return;
}
if (_frontLength + separatorLength == _input.length)
{
// Special case: popping the first-to-last item; there is
// an empty item right after this.
_input = _input[_input.length .. _input.length];
_frontLength = 0;
return;
}
// Normal case, pop one item and the separator, get ready for
// reading the next item
_input = _input[_frontLength + separatorLength .. _input.length];
}
// Normal case, pop one item and the separator, get ready for
// reading the next item
_input = _input[_frontLength + separatorLength .. _input.length];
// mark _frontLength as uninitialized
_frontLength = _frontLength.max;
}