Fix Issue 15797: Add keepSeperator option to std.regex.splitter

This commit is contained in:
Jack Stouffer 2016-04-08 16:27:35 -04:00
parent b4f0478434
commit 02e11abd54

View file

@ -254,6 +254,7 @@ module std.regex;
import std.regex.internal.ir; import std.regex.internal.ir;
import std.regex.internal.thompson; //TODO: get rid of this dependency import std.regex.internal.thompson; //TODO: get rid of this dependency
import std.exception, std.traits, std.range; import std.exception, std.traits, std.range;
import std.typecons; // : Flag, Yes, No;
/++ /++
$(D Regex) object holds regular expression pattern in compiled form. $(D Regex) object holds regular expression pattern in compiled form.
@ -1327,11 +1328,17 @@ public R replace(alias fun, R, RegEx)(R input, RegEx re)
return replaceAllWith!(fun, match)(input, re); return replaceAllWith!(fun, match)(input, re);
} }
/++ /**
Range that splits a string using a regular expression as a Splits a string `r` using a regular expression `pat` as a separator.
separator.
+/ Params:
public struct Splitter(Range, alias RegEx = Regex) keepSeparators = flag to specify if the matches should be in the resulting range
r = the string to split
pat = the pattern to split on
Returns:
A lazy range of strings
*/
public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
if(isSomeString!Range && isRegexFor!(RegEx, Range)) if(isSomeString!Range && isRegexFor!(RegEx, Range))
{ {
private: private:
@ -1340,6 +1347,8 @@ private:
alias Rx = typeof(match(Range.init,RegEx.init)); alias Rx = typeof(match(Range.init,RegEx.init));
Rx _match; Rx _match;
static if (keepSeparators) bool onMatch = false;
@trusted this(Range input, RegEx separator) @trusted this(Range input, RegEx separator)
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
_input = input; _input = input;
@ -1352,6 +1361,10 @@ private:
else else
{ {
_match = Rx(_input, separator); _match = Rx(_input, separator);
static if (keepSeparators)
if (_match.pre.empty)
popFront();
} }
} }
@ -1368,13 +1381,27 @@ public:
assert(!empty && _offset <= _match.pre.length assert(!empty && _offset <= _match.pre.length
&& _match.pre.length <= _input.length); && _match.pre.length <= _input.length);
return _input[_offset .. min($, _match.pre.length)];
static if (keepSeparators)
{
if (!onMatch)
return _input[_offset .. min($, _match.pre.length)];
else
return _match.hit();
}
else
{
return _input[_offset .. min($, _match.pre.length)];
}
} }
///ditto ///ditto
@property bool empty() @property bool empty()
{ {
return _offset > _input.length; static if (keepSeparators)
return _offset >= _input.length;
else
return _offset > _input.length;
} }
///ditto ///ditto
@ -1388,9 +1415,27 @@ public:
} }
else else
{ {
//skip past the separator static if (keepSeparators)
_offset = _match.pre.length + _match.hit.length; {
_match.popFront(); if (!onMatch)
{
//skip past the separator
_offset = _match.pre.length;
}
else
{
_offset += _match.hit.length;
_match.popFront();
}
onMatch = !onMatch;
}
else
{
//skip past the separator
_offset = _match.pre.length + _match.hit.length;
_match.popFront();
}
} }
} }
@ -1401,14 +1446,12 @@ public:
} }
} }
/** /// ditto
A helper function, creates a $(D Splitter) on range $(D r) separated by regex $(D pat). public Splitter!(keepSeparators, Range, RegEx) splitter(
Captured subexpressions have no effect on the resulting range. Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat) if (
*/ is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
public Splitter!(Range, RegEx) splitter(Range, RegEx)(Range r, RegEx pat)
if(is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
{ {
return Splitter!(Range, RegEx)(r, pat); return Splitter!(keepSeparators, Range, RegEx)(r, pat);
} }
/// ///
@ -1420,6 +1463,22 @@ unittest
["", "abc", "de", "fg", "hi", ""])); ["", "abc", "de", "fg", "hi", ""]));
} }
/// Split on a pattern, but keep the matches in the resulting range
unittest
{
import std.algorithm.comparison : equal;
auto pattern = regex(`([\.,])`);
assert("2003.04.05"
.splitter!(Yes.keepSeparators)(pattern)
.equal(["2003", ".", "04", ".", "05"]));
assert(",1,2,3"
.splitter!(Yes.keepSeparators)(pattern)
.equal([",", "1", ",", "2", ",", "3"]));
}
///An eager version of $(D splitter) that creates an array with splitted slices of $(D input). ///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
public @trusted String[] split(String, RegEx)(String input, RegEx rx) public @trusted String[] split(String, RegEx)(String input, RegEx rx)
if(isSomeString!String && isRegexFor!(RegEx, String)) if(isSomeString!String && isRegexFor!(RegEx, String))