mirror of
https://github.com/dlang/phobos.git
synced 2025-05-01 15:40:36 +03:00
Merge pull request #462 from blackwhale/regex-fixes
fix Issue 7300 - std.regex.ShiftOr!dchar.search is broken
This commit is contained in:
commit
84a847cbe1
5 changed files with 162 additions and 98 deletions
|
@ -2,14 +2,17 @@ $(VERSION 059, ddd mm, 2012, =================================================,
|
||||||
|
|
||||||
$(LIBBUGSFIXED
|
$(LIBBUGSFIXED
|
||||||
$(LI $(BUGZILLA 4604): A stack overflow with writeln)
|
$(LI $(BUGZILLA 4604): A stack overflow with writeln)
|
||||||
|
$(LI $(BUGZILLA 5523): std.regex handles "\s" and "\W" (etc.) inside square brackets improperly)
|
||||||
$(LI $(BUGZILLA 5674): AssertError in std.regex)
|
$(LI $(BUGZILLA 5674): AssertError in std.regex)
|
||||||
$(LI $(BUGZILLA 5652): Add \p and \P unicode properties to std.regex)
|
$(LI $(BUGZILLA 5652): Add \p and \P unicode properties to std.regex)
|
||||||
$(LI $(BUGZILLA 5964): std.stdio.readln can throw a UnicodeException)
|
$(LI $(BUGZILLA 5964): std.stdio.readln can throw a UnicodeException)
|
||||||
|
$(LI $(BUGZILLA 6217): [GSOC] result of std.algorithm.map is not movable)
|
||||||
$(LI $(BUGZILLA 6403): Upgrade std.regex to Unicode UTS #18 Level 1 support)
|
$(LI $(BUGZILLA 6403): Upgrade std.regex to Unicode UTS #18 Level 1 support)
|
||||||
$(LI $(BUGZILLA 7111): New regex engine cannot match beginning of empty string)
|
$(LI $(BUGZILLA 7111): New regex engine cannot match beginning of empty string)
|
||||||
$(LI $(BUGZILLA 7138): Can't call array() on dirEntries)
|
$(LI $(BUGZILLA 7138): Can't call array() on dirEntries)
|
||||||
$(LI $(BUGZILLA 7264): Can't iterate result from 4-arg dirEntries as string)
|
$(LI $(BUGZILLA 7264): Can't iterate result from 4-arg dirEntries as string)
|
||||||
$(LI $(BUGZILLA 7299): std.uni missing doc comments)
|
$(LI $(BUGZILLA 7299): std.uni missing doc comments)
|
||||||
|
$(LI $(BUGZILLA 7300): std.regex.ShiftOr!dchar.search is broken)
|
||||||
$(LI $(BUGZILLA 7374): stdin.byLine() throws AssertError on empty input)
|
$(LI $(BUGZILLA 7374): stdin.byLine() throws AssertError on empty input)
|
||||||
$(LI $(BUGZILLA 7628): std.format formatValue incorrect overload)
|
$(LI $(BUGZILLA 7628): std.format formatValue incorrect overload)
|
||||||
$(LI $(BUGZILLA 7674): regex replace requires escaped format)
|
$(LI $(BUGZILLA 7674): regex replace requires escaped format)
|
||||||
|
|
|
@ -1406,7 +1406,7 @@ unittest
|
||||||
/// Ditto
|
/// Ditto
|
||||||
T move(T)(ref T src)
|
T move(T)(ref T src)
|
||||||
{
|
{
|
||||||
T result;
|
T result=void;
|
||||||
move(src, result);
|
move(src, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -8771,3 +8771,9 @@ unittest
|
||||||
//writeln(b[0]);
|
//writeln(b[0]);
|
||||||
assert(b[0] == tuple(4.0, 2u));
|
assert(b[0] == tuple(4.0, 2u));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unittest//Issue 6217
|
||||||
|
{
|
||||||
|
auto x = map!"a"([1,2,3]);
|
||||||
|
x = move(x);
|
||||||
|
}
|
||||||
|
|
|
@ -58,16 +58,6 @@ body
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//ditto
|
|
||||||
@trusted void moveAllAlt(T)(T[] src, T[] dest)
|
|
||||||
{//moveAll is @system
|
|
||||||
if(__ctfe)
|
|
||||||
foreach(i,v; src)
|
|
||||||
dest[i] = v;
|
|
||||||
else
|
|
||||||
moveAll(src, dest);
|
|
||||||
}
|
|
||||||
|
|
||||||
//$(D Interval) represents an interval of codepoints: [a,b).
|
//$(D Interval) represents an interval of codepoints: [a,b).
|
||||||
struct Interval
|
struct Interval
|
||||||
{
|
{
|
||||||
|
|
56
std/range.d
56
std/range.d
|
@ -5932,6 +5932,27 @@ unittest {
|
||||||
assert(equal(app.data, [1,2,3]));
|
assert(equal(app.data, [1,2,3]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Returns true if $(D fn) accepts variables of type T1 and T2 in any order.
|
||||||
|
The following code should compile:
|
||||||
|
---
|
||||||
|
T1 t1;
|
||||||
|
T2 t2;
|
||||||
|
fn(t1, t2);
|
||||||
|
fn(t2, t1);
|
||||||
|
---
|
||||||
|
*/
|
||||||
|
template isTwoWayCompatible(alias fn, T1, T2)
|
||||||
|
{
|
||||||
|
enum isTwoWayCompatible = is(typeof( (){
|
||||||
|
T1 e;
|
||||||
|
T2 v;
|
||||||
|
return fn(v,e) && fn(e,v);
|
||||||
|
}
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Policy used with the searching primitives $(D lowerBound), $(D
|
Policy used with the searching primitives $(D lowerBound), $(D
|
||||||
upperBound), and $(D equalRange) of $(LREF SortedRange) below.
|
upperBound), and $(D equalRange) of $(LREF SortedRange) below.
|
||||||
|
@ -6233,10 +6254,9 @@ if (isRandomAccessRange!Range)
|
||||||
----
|
----
|
||||||
*/
|
*/
|
||||||
auto lowerBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
auto lowerBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
||||||
if (is(V : ElementType!Range))
|
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||||
{
|
{
|
||||||
ElementType!Range v = value;
|
return this[0 .. getTransitionIndex!(sp, geq)(value)];
|
||||||
return this[0 .. getTransitionIndex!(sp, geq)(v)];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// upperBound
|
// upperBound
|
||||||
|
@ -6257,10 +6277,9 @@ if (isRandomAccessRange!Range)
|
||||||
----
|
----
|
||||||
*/
|
*/
|
||||||
auto upperBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
auto upperBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
||||||
if (is(V : ElementType!Range))
|
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||||
{
|
{
|
||||||
ElementType!Range v = value;
|
return this[getTransitionIndex!(sp, gt)(value) .. length];
|
||||||
return this[getTransitionIndex!(sp, gt)(v) .. length];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// equalRange
|
// equalRange
|
||||||
|
@ -6284,7 +6303,8 @@ if (isRandomAccessRange!Range)
|
||||||
assert(equal(r, [ 3, 3, 3 ]));
|
assert(equal(r, [ 3, 3, 3 ]));
|
||||||
----
|
----
|
||||||
*/
|
*/
|
||||||
auto equalRange(V)(V value) if (is(V : ElementType!Range))
|
auto equalRange(V)(V value)
|
||||||
|
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||||
{
|
{
|
||||||
size_t first = 0, count = _input.length;
|
size_t first = 0, count = _input.length;
|
||||||
while (count > 0)
|
while (count > 0)
|
||||||
|
@ -6339,7 +6359,8 @@ assert(equal(r[1], [ 3, 3, 3 ]));
|
||||||
assert(equal(r[2], [ 4, 4, 5, 6 ]));
|
assert(equal(r[2], [ 4, 4, 5, 6 ]));
|
||||||
----
|
----
|
||||||
*/
|
*/
|
||||||
auto trisect(V)(V value) if (is(V : ElementType!Range))
|
auto trisect(V)(V value)
|
||||||
|
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||||
{
|
{
|
||||||
size_t first = 0, count = _input.length;
|
size_t first = 0, count = _input.length;
|
||||||
while (count > 0)
|
while (count > 0)
|
||||||
|
@ -6445,6 +6466,19 @@ unittest
|
||||||
assert(equal(r[2], [ 40, 40, 50, 60 ]));
|
assert(equal(r[2], [ 40, 40, 50, 60 ]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
auto a = [ "A", "AG", "B", "E", "F" ];
|
||||||
|
auto r = assumeSorted!"cmp(a,b) < 0"(a).trisect("B"w);
|
||||||
|
assert(equal(r[0], [ "A", "AG" ]));
|
||||||
|
assert(equal(r[1], [ "B" ]));
|
||||||
|
assert(equal(r[2], [ "E", "F" ]));
|
||||||
|
r = assumeSorted!"cmp(a,b) < 0"(a).trisect("A"d);
|
||||||
|
assert(r[0].empty);
|
||||||
|
assert(equal(r[1], [ "A" ]));
|
||||||
|
assert(equal(r[2], [ "AG", "B", "E", "F" ]));
|
||||||
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
{
|
{
|
||||||
static void test(SearchPolicy pol)()
|
static void test(SearchPolicy pol)()
|
||||||
|
@ -6536,6 +6570,8 @@ unittest
|
||||||
assert(equal(p, [0, 1, 2, 3, 4]));
|
assert(equal(p, [0, 1, 2, 3, 4]));
|
||||||
p = assumeSorted(a).lowerBound(6);
|
p = assumeSorted(a).lowerBound(6);
|
||||||
assert(equal(p, [ 0, 1, 2, 3, 4, 5]));
|
assert(equal(p, [ 0, 1, 2, 3, 4, 5]));
|
||||||
|
p = assumeSorted(a).lowerBound(6.9);
|
||||||
|
assert(equal(p, [ 0, 1, 2, 3, 4, 5, 6]));
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
@ -6543,6 +6579,8 @@ unittest
|
||||||
int[] a = [ 1, 2, 3, 3, 3, 4, 4, 5, 6 ];
|
int[] a = [ 1, 2, 3, 3, 3, 4, 4, 5, 6 ];
|
||||||
auto p = assumeSorted(a).upperBound(3);
|
auto p = assumeSorted(a).upperBound(3);
|
||||||
assert(equal(p, [4, 4, 5, 6 ]));
|
assert(equal(p, [4, 4, 5, 6 ]));
|
||||||
|
p = assumeSorted(a).upperBound(4.2);
|
||||||
|
assert(equal(p, [ 5, 6 ]));
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
@ -6558,6 +6596,8 @@ unittest
|
||||||
assert(p.empty);
|
assert(p.empty);
|
||||||
p = assumeSorted(a).equalRange(7);
|
p = assumeSorted(a).equalRange(7);
|
||||||
assert(p.empty);
|
assert(p.empty);
|
||||||
|
p = assumeSorted(a).equalRange(3.0);
|
||||||
|
assert(equal(p, [ 3, 3, 3]));
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
|
137
std/regex.d
137
std/regex.d
|
@ -774,22 +774,12 @@ auto memoizeExpr(string expr)()
|
||||||
s.add(Interval(0,0x7f));
|
s.add(Interval(0,0x7f));
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
version(fred_perfect_hashing)
|
auto range = assumeSorted!((x,y) => ucmp(x.name, y.name) < 0)(unicodeProperties);
|
||||||
{
|
|
||||||
uint key = phash(name);
|
|
||||||
if(key >= PHASHNKEYS || ucmp(name,unicodeProperties[key].name) != 0)
|
|
||||||
enforce(0, "invalid property name");
|
|
||||||
s = cast(CodepointSet)unicodeProperties[key].set;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto range = assumeSorted!((x,y){ return ucmp(x.name, y.name) < 0; })(unicodeProperties);
|
|
||||||
//creating empty Codepointset is a workaround
|
//creating empty Codepointset is a workaround
|
||||||
auto eq = range.lowerBound(UnicodeProperty(cast(string)name,CodepointSet.init)).length;
|
auto eq = range.lowerBound(UnicodeProperty(cast(string)name,CodepointSet.init)).length;
|
||||||
enforce(eq!=range.length && ucmp(name,range[eq].name)==0,"invalid property name");
|
enforce(eq!=range.length && ucmp(name,range[eq].name)==0,"invalid property name");
|
||||||
s = range[eq].set.dup;
|
s = range[eq].set.dup;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if(casefold)
|
if(casefold)
|
||||||
s = caseEnclose(s);
|
s = caseEnclose(s);
|
||||||
|
@ -873,15 +863,12 @@ struct Parser(R, bool CTFE=false)
|
||||||
if(isSomeString!S)
|
if(isSomeString!S)
|
||||||
{
|
{
|
||||||
pat = origin = pattern;
|
pat = origin = pattern;
|
||||||
|
//reserve slightly more then avg as sampled from unittests
|
||||||
if(!__ctfe)
|
if(!__ctfe)
|
||||||
ir.reserve(pat.length);
|
ir.reserve((pat.length*5+2)/4);
|
||||||
parseFlags(flags);
|
parseFlags(flags);
|
||||||
_current = ' ';//a safe default for freeform parsing
|
_current = ' ';//a safe default for freeform parsing
|
||||||
next();
|
next();
|
||||||
if(__ctfe)
|
|
||||||
parseRegex();
|
|
||||||
else
|
|
||||||
{
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parseRegex();
|
parseRegex();
|
||||||
|
@ -890,7 +877,6 @@ struct Parser(R, bool CTFE=false)
|
||||||
{
|
{
|
||||||
error(e.msg);//also adds pattern location
|
error(e.msg);//also adds pattern location
|
||||||
}
|
}
|
||||||
}
|
|
||||||
put(Bytecode(IR.End, 0));
|
put(Bytecode(IR.End, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -911,10 +897,8 @@ struct Parser(R, bool CTFE=false)
|
||||||
empty = true;
|
empty = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//for CTFEability
|
_current = pat.front;
|
||||||
size_t idx=0;
|
pat.popFront();
|
||||||
_current = decode(pat, idx);
|
|
||||||
pat = pat[idx..$];
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1250,7 +1234,7 @@ struct Parser(R, bool CTFE=false)
|
||||||
default:
|
default:
|
||||||
if(replace)
|
if(replace)
|
||||||
{
|
{
|
||||||
moveAllAlt(ir[offset+1..$],ir[offset..$-1]);
|
moveAll(ir[offset+1..$],ir[offset..$-1]);
|
||||||
ir.length -= 1;
|
ir.length -= 1;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -1298,17 +1282,10 @@ struct Parser(R, bool CTFE=false)
|
||||||
counterDepth = std.algorithm.max(counterDepth, nesting+1);
|
counterDepth = std.algorithm.max(counterDepth, nesting+1);
|
||||||
}
|
}
|
||||||
else if(replace)
|
else if(replace)
|
||||||
{
|
|
||||||
if(__ctfe)//CTFE workaround: no moveAll and length -= x;
|
|
||||||
{
|
|
||||||
ir = ir[0..offset] ~ ir[offset+1..$];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
moveAll(ir[offset+1 .. $],ir[offset .. $-1]);
|
moveAll(ir[offset+1 .. $],ir[offset .. $-1]);
|
||||||
ir.length -= 1;
|
ir.length -= 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
put(Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len));
|
put(Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len));
|
||||||
enforce(ir.length + len < maxCompiledLength, "maximum compiled pattern length is exceeded");
|
enforce(ir.length + len < maxCompiledLength, "maximum compiled pattern length is exceeded");
|
||||||
ir ~= ir[offset .. offset+len];
|
ir ~= ir[offset .. offset+len];
|
||||||
|
@ -2162,13 +2139,8 @@ private:
|
||||||
//
|
//
|
||||||
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
|
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
|
||||||
{//equal is @system?
|
{//equal is @system?
|
||||||
//@@@BUG@@@ assumeSorted kills "-inline"
|
auto fnd = assumeSorted!"cmp(a,b) < 0"(map!"a.name"(dict)).lowerBound(name).length;
|
||||||
//auto fnd = assumeSorted(map!"a.name"(dict)).lowerBound(name).length;
|
enforce(equal(dict[fnd].name, name), text("no submatch named ", name));
|
||||||
uint fnd;
|
|
||||||
for(fnd = 0; fnd<dict.length; fnd++)
|
|
||||||
if(equal(dict[fnd].name,name))
|
|
||||||
break;
|
|
||||||
enforce(fnd < dict.length, text("no submatch named ", name));
|
|
||||||
return dict[fnd].group;
|
return dict[fnd].group;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2766,7 +2738,7 @@ public:
|
||||||
// returns only valid UTF indexes
|
// returns only valid UTF indexes
|
||||||
// (that given the haystack in question is valid UTF string)
|
// (that given the haystack in question is valid UTF string)
|
||||||
@trusted size_t search(const(Char)[] haystack, size_t idx)
|
@trusted size_t search(const(Char)[] haystack, size_t idx)
|
||||||
{
|
{//@BUG: apparently assumes little endian machines
|
||||||
assert(!empty);
|
assert(!empty);
|
||||||
auto p = cast(const(ubyte)*)(haystack.ptr+idx);
|
auto p = cast(const(ubyte)*)(haystack.ptr+idx);
|
||||||
uint state = uint.max;
|
uint state = uint.max;
|
||||||
|
@ -2779,9 +2751,10 @@ public:
|
||||||
while(p != end)
|
while(p != end)
|
||||||
{
|
{
|
||||||
if(!~state)
|
if(!~state)
|
||||||
{
|
{//speed up seeking first matching place
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
|
assert(p <= end, text(p," vs ", end));
|
||||||
p = cast(ubyte*)memchr(p, fChar, end - p);
|
p = cast(ubyte*)memchr(p, fChar, end - p);
|
||||||
if(!p)
|
if(!p)
|
||||||
return haystack.length;
|
return haystack.length;
|
||||||
|
@ -2796,31 +2769,40 @@ public:
|
||||||
{
|
{
|
||||||
state = (state<<1) | table[p[1]];
|
state = (state<<1) | table[p[1]];
|
||||||
state = (state<<1) | table[p[2]];
|
state = (state<<1) | table[p[2]];
|
||||||
p += 3;
|
p += 4;
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
//first char is already tested, see if that's all
|
p++;
|
||||||
if(!(state & limit))//division rounds down for dchar
|
//first char is tested, see if that's all
|
||||||
|
if(!(state & limit))
|
||||||
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
|
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
|
||||||
-length+1;
|
-length;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{//have some bits/states for possible matches,
|
||||||
|
//use the usual shift-or cycle
|
||||||
static if(charSize == 3)
|
static if(charSize == 3)
|
||||||
{
|
{
|
||||||
|
state = (state<<1) | table[p[0]];
|
||||||
state = (state<<1) | table[p[1]];
|
state = (state<<1) | table[p[1]];
|
||||||
state = (state<<1) | table[p[2]];
|
state = (state<<1) | table[p[2]];
|
||||||
state = (state<<1) | table[p[3]];
|
|
||||||
p+=4;
|
p+=4;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
state = (state<<1) | table[p[1]];
|
state = (state<<1) | table[p[0]];
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
if(!(state & limit))
|
||||||
|
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
|
||||||
|
-length;
|
||||||
|
}
|
||||||
debug(fred_search) writefln("State: %32b", state);
|
debug(fred_search) writefln("State: %32b", state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//in this path we have to shift first
|
//normal path, partially unrolled for char/wchar
|
||||||
static if(charSize == 3)
|
static if(charSize == 3)
|
||||||
{
|
{
|
||||||
const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length);
|
const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length);
|
||||||
|
@ -4870,8 +4852,6 @@ enum OneShot { Fwd, Bwd };
|
||||||
if(is(Char : dchar))
|
if(is(Char : dchar))
|
||||||
{
|
{
|
||||||
alias Stream.DataIndex DataIndex;
|
alias Stream.DataIndex DataIndex;
|
||||||
alias const(Char)[] String;
|
|
||||||
enum threadAllocSize = 16;
|
|
||||||
Thread!DataIndex* freelist;
|
Thread!DataIndex* freelist;
|
||||||
ThreadList!DataIndex clist, nlist;
|
ThreadList!DataIndex clist, nlist;
|
||||||
DataIndex[] merge;
|
DataIndex[] merge;
|
||||||
|
@ -4978,7 +4958,6 @@ enum OneShot { Fwd, Bwd };
|
||||||
writeln("------------------------------------------");
|
writeln("------------------------------------------");
|
||||||
if(exhausted)
|
if(exhausted)
|
||||||
{
|
{
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if(re.flags & RegexInfo.oneShot)
|
if(re.flags & RegexInfo.oneShot)
|
||||||
|
@ -5039,8 +5018,7 @@ enum OneShot { Fwd, Bwd };
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
exhausted = true;
|
|
||||||
genCounter++; //increment also on each end
|
genCounter++; //increment also on each end
|
||||||
debug(fred_matching) writefln("Threaded matching threads at end");
|
debug(fred_matching) writefln("Threaded matching threads at end");
|
||||||
//try out all zero-width posibilities
|
//try out all zero-width posibilities
|
||||||
|
@ -5050,8 +5028,17 @@ enum OneShot { Fwd, Bwd };
|
||||||
}
|
}
|
||||||
if(!matched)
|
if(!matched)
|
||||||
eval!false(createStart(index), matches);//new thread starting at end of input
|
eval!false(createStart(index), matches);//new thread starting at end of input
|
||||||
if(matched && !(re.flags & RegexOption.global))
|
if(matched)
|
||||||
exhausted = true;
|
{//in case NFA found match along the way
|
||||||
|
//and last possible longer alternative ultimately failed
|
||||||
|
s.reset(matches[0].end);//reset to last successful match
|
||||||
|
next();//and reload front character
|
||||||
|
//--- here the exact state of stream was restored ---
|
||||||
|
exhausted = atEnd || !(re.flags & RegexOption.global);
|
||||||
|
//+ empty match advances the input
|
||||||
|
if(!exhausted && matches[0].begin == matches[0].end)
|
||||||
|
next();
|
||||||
|
}
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6278,6 +6265,24 @@ public:
|
||||||
@property ref captures(){ return this; }
|
@property ref captures(){ return this; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unittest//verify example
|
||||||
|
{
|
||||||
|
auto m = match("@abc#", regex(`(\w)(\w)(\w)`));
|
||||||
|
auto c = m.captures;
|
||||||
|
assert(c.pre == "@");// part of input preceeding match
|
||||||
|
assert(c.post == "#"); // immediately after match
|
||||||
|
assert(c.hit == c[0] && c.hit == "abc");// the whole match
|
||||||
|
assert(c[2] =="b");
|
||||||
|
assert(c.front == "abc");
|
||||||
|
c.popFront();
|
||||||
|
assert(c.front == "a");
|
||||||
|
assert(c.back == "c");
|
||||||
|
c.popBack();
|
||||||
|
assert(c.back == "b");
|
||||||
|
popFrontN(c, 2);
|
||||||
|
assert(c.empty);
|
||||||
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
A regex engine state, as returned by $(D match) family of functions.
|
A regex engine state, as returned by $(D match) family of functions.
|
||||||
|
|
||||||
|
@ -6397,9 +6402,19 @@ public:
|
||||||
|
|
||||||
Throws: $(D RegexException) if there were any errors during compilation.
|
Throws: $(D RegexException) if there were any errors during compilation.
|
||||||
+/
|
+/
|
||||||
public auto regex(S)(S pattern, const(char)[] flags="")
|
@trusted public auto regex(S)(S pattern, const(char)[] flags="")
|
||||||
if(isSomeString!(S))
|
if(isSomeString!(S))
|
||||||
{
|
{
|
||||||
|
enum cacheSize = 8; //TODO: invent nice interface to control regex caching
|
||||||
|
if(__ctfe)
|
||||||
|
return regexImpl(pattern, flags);
|
||||||
|
return memoize!(regexImpl!S, cacheSize)(pattern, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
public auto regexImpl(S)(S pattern, const(char)[] flags="")
|
||||||
|
if(isSomeString!(S))
|
||||||
|
{
|
||||||
|
alias Regex!(BasicElementOf!S) Reg;
|
||||||
if(!__ctfe)
|
if(!__ctfe)
|
||||||
{
|
{
|
||||||
auto parser = Parser!(Unqual!(typeof(pattern)))(pattern, flags);
|
auto parser = Parser!(Unqual!(typeof(pattern)))(pattern, flags);
|
||||||
|
@ -7228,6 +7243,7 @@ unittest
|
||||||
run_tests!match(); //thompson VM
|
run_tests!match(); //thompson VM
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
version(fred_ct)
|
version(fred_ct)
|
||||||
{
|
{
|
||||||
unittest
|
unittest
|
||||||
|
@ -7424,6 +7440,11 @@ else
|
||||||
if(ch != '-') //'--' is an operator
|
if(ch != '-') //'--' is an operator
|
||||||
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
|
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
|
||||||
}
|
}
|
||||||
|
//bugzilla 7718
|
||||||
|
string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
|
||||||
|
auto reStrCmd = regex (`(".*")|('.*')`, "g");
|
||||||
|
assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
|
||||||
|
[`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
|
||||||
}
|
}
|
||||||
test_body!bmatch();
|
test_body!bmatch();
|
||||||
test_body!match();
|
test_body!match();
|
||||||
|
@ -7502,7 +7523,11 @@ else
|
||||||
}
|
}
|
||||||
unittest
|
unittest
|
||||||
{//bugzilla 7111
|
{//bugzilla 7111
|
||||||
assert(!match("", regex("^")).empty);
|
assert(match("", regex("^")));
|
||||||
|
}
|
||||||
|
unittest
|
||||||
|
{//bugzilla 7300
|
||||||
|
assert(!match("a"d, "aa"d));
|
||||||
}
|
}
|
||||||
|
|
||||||
unittest
|
unittest
|
||||||
|
@ -7523,4 +7548,4 @@ else
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}//version(unittest)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue