mirror of
https://github.com/dlang/phobos.git
synced 2025-05-01 15:40:36 +03:00
Merge pull request #462 from blackwhale/regex-fixes
fix Issue 7300 - std.regex.ShiftOr!dchar.search is broken
This commit is contained in:
commit
84a847cbe1
5 changed files with 162 additions and 98 deletions
|
@ -2,14 +2,17 @@ $(VERSION 059, ddd mm, 2012, =================================================,
|
|||
|
||||
$(LIBBUGSFIXED
|
||||
$(LI $(BUGZILLA 4604): A stack overflow with writeln)
|
||||
$(LI $(BUGZILLA 5523): std.regex handles "\s" and "\W" (etc.) inside square brackets improperly)
|
||||
$(LI $(BUGZILLA 5674): AssertError in std.regex)
|
||||
$(LI $(BUGZILLA 5652): Add \p and \P unicode properties to std.regex)
|
||||
$(LI $(BUGZILLA 5964): std.stdio.readln can throw a UnicodeException)
|
||||
$(LI $(BUGZILLA 6217): [GSOC] result of std.algorithm.map is not movable)
|
||||
$(LI $(BUGZILLA 6403): Upgrade std.regex to Unicode UTS #18 Level 1 support)
|
||||
$(LI $(BUGZILLA 7111): New regex engine cannot match beginning of empty string)
|
||||
$(LI $(BUGZILLA 7138): Can't call array() on dirEntries)
|
||||
$(LI $(BUGZILLA 7264): Can't iterate result from 4-arg dirEntries as string)
|
||||
$(LI $(BUGZILLA 7299): std.uni missing doc comments)
|
||||
$(LI $(BUGZILLA 7300): std.regex.ShiftOr!dchar.search is broken)
|
||||
$(LI $(BUGZILLA 7374): stdin.byLine() throws AssertError on empty input)
|
||||
$(LI $(BUGZILLA 7628): std.format formatValue incorrect overload)
|
||||
$(LI $(BUGZILLA 7674): regex replace requires escaped format)
|
||||
|
|
|
@ -1406,7 +1406,7 @@ unittest
|
|||
/// Ditto
|
||||
T move(T)(ref T src)
|
||||
{
|
||||
T result;
|
||||
T result=void;
|
||||
move(src, result);
|
||||
return result;
|
||||
}
|
||||
|
@ -8771,3 +8771,9 @@ unittest
|
|||
//writeln(b[0]);
|
||||
assert(b[0] == tuple(4.0, 2u));
|
||||
}
|
||||
|
||||
unittest//Issue 6217
|
||||
{
|
||||
auto x = map!"a"([1,2,3]);
|
||||
x = move(x);
|
||||
}
|
||||
|
|
|
@ -58,16 +58,6 @@ body
|
|||
}
|
||||
}
|
||||
|
||||
//ditto
|
||||
@trusted void moveAllAlt(T)(T[] src, T[] dest)
|
||||
{//moveAll is @system
|
||||
if(__ctfe)
|
||||
foreach(i,v; src)
|
||||
dest[i] = v;
|
||||
else
|
||||
moveAll(src, dest);
|
||||
}
|
||||
|
||||
//$(D Interval) represents an interval of codepoints: [a,b).
|
||||
struct Interval
|
||||
{
|
||||
|
|
56
std/range.d
56
std/range.d
|
@ -5932,6 +5932,27 @@ unittest {
|
|||
assert(equal(app.data, [1,2,3]));
|
||||
}
|
||||
|
||||
/**
|
||||
Returns true if $(D fn) accepts variables of type T1 and T2 in any order.
|
||||
The following code should compile:
|
||||
---
|
||||
T1 t1;
|
||||
T2 t2;
|
||||
fn(t1, t2);
|
||||
fn(t2, t1);
|
||||
---
|
||||
*/
|
||||
template isTwoWayCompatible(alias fn, T1, T2)
|
||||
{
|
||||
enum isTwoWayCompatible = is(typeof( (){
|
||||
T1 e;
|
||||
T2 v;
|
||||
return fn(v,e) && fn(e,v);
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Policy used with the searching primitives $(D lowerBound), $(D
|
||||
upperBound), and $(D equalRange) of $(LREF SortedRange) below.
|
||||
|
@ -6233,10 +6254,9 @@ if (isRandomAccessRange!Range)
|
|||
----
|
||||
*/
|
||||
auto lowerBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
||||
if (is(V : ElementType!Range))
|
||||
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||
{
|
||||
ElementType!Range v = value;
|
||||
return this[0 .. getTransitionIndex!(sp, geq)(v)];
|
||||
return this[0 .. getTransitionIndex!(sp, geq)(value)];
|
||||
}
|
||||
|
||||
// upperBound
|
||||
|
@ -6257,10 +6277,9 @@ if (isRandomAccessRange!Range)
|
|||
----
|
||||
*/
|
||||
auto upperBound(SearchPolicy sp = SearchPolicy.binarySearch, V)(V value)
|
||||
if (is(V : ElementType!Range))
|
||||
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||
{
|
||||
ElementType!Range v = value;
|
||||
return this[getTransitionIndex!(sp, gt)(v) .. length];
|
||||
return this[getTransitionIndex!(sp, gt)(value) .. length];
|
||||
}
|
||||
|
||||
// equalRange
|
||||
|
@ -6284,7 +6303,8 @@ if (isRandomAccessRange!Range)
|
|||
assert(equal(r, [ 3, 3, 3 ]));
|
||||
----
|
||||
*/
|
||||
auto equalRange(V)(V value) if (is(V : ElementType!Range))
|
||||
auto equalRange(V)(V value)
|
||||
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||
{
|
||||
size_t first = 0, count = _input.length;
|
||||
while (count > 0)
|
||||
|
@ -6339,7 +6359,8 @@ assert(equal(r[1], [ 3, 3, 3 ]));
|
|||
assert(equal(r[2], [ 4, 4, 5, 6 ]));
|
||||
----
|
||||
*/
|
||||
auto trisect(V)(V value) if (is(V : ElementType!Range))
|
||||
auto trisect(V)(V value)
|
||||
if (isTwoWayCompatible!(predFun, ElementType!Range, V))
|
||||
{
|
||||
size_t first = 0, count = _input.length;
|
||||
while (count > 0)
|
||||
|
@ -6445,6 +6466,19 @@ unittest
|
|||
assert(equal(r[2], [ 40, 40, 50, 60 ]));
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
auto a = [ "A", "AG", "B", "E", "F" ];
|
||||
auto r = assumeSorted!"cmp(a,b) < 0"(a).trisect("B"w);
|
||||
assert(equal(r[0], [ "A", "AG" ]));
|
||||
assert(equal(r[1], [ "B" ]));
|
||||
assert(equal(r[2], [ "E", "F" ]));
|
||||
r = assumeSorted!"cmp(a,b) < 0"(a).trisect("A"d);
|
||||
assert(r[0].empty);
|
||||
assert(equal(r[1], [ "A" ]));
|
||||
assert(equal(r[2], [ "AG", "B", "E", "F" ]));
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
static void test(SearchPolicy pol)()
|
||||
|
@ -6536,6 +6570,8 @@ unittest
|
|||
assert(equal(p, [0, 1, 2, 3, 4]));
|
||||
p = assumeSorted(a).lowerBound(6);
|
||||
assert(equal(p, [ 0, 1, 2, 3, 4, 5]));
|
||||
p = assumeSorted(a).lowerBound(6.9);
|
||||
assert(equal(p, [ 0, 1, 2, 3, 4, 5, 6]));
|
||||
}
|
||||
|
||||
unittest
|
||||
|
@ -6543,6 +6579,8 @@ unittest
|
|||
int[] a = [ 1, 2, 3, 3, 3, 4, 4, 5, 6 ];
|
||||
auto p = assumeSorted(a).upperBound(3);
|
||||
assert(equal(p, [4, 4, 5, 6 ]));
|
||||
p = assumeSorted(a).upperBound(4.2);
|
||||
assert(equal(p, [ 5, 6 ]));
|
||||
}
|
||||
|
||||
unittest
|
||||
|
@ -6558,6 +6596,8 @@ unittest
|
|||
assert(p.empty);
|
||||
p = assumeSorted(a).equalRange(7);
|
||||
assert(p.empty);
|
||||
p = assumeSorted(a).equalRange(3.0);
|
||||
assert(equal(p, [ 3, 3, 3]));
|
||||
}
|
||||
|
||||
unittest
|
||||
|
|
137
std/regex.d
137
std/regex.d
|
@ -774,22 +774,12 @@ auto memoizeExpr(string expr)()
|
|||
s.add(Interval(0,0x7f));
|
||||
else
|
||||
{
|
||||
version(fred_perfect_hashing)
|
||||
{
|
||||
uint key = phash(name);
|
||||
if(key >= PHASHNKEYS || ucmp(name,unicodeProperties[key].name) != 0)
|
||||
enforce(0, "invalid property name");
|
||||
s = cast(CodepointSet)unicodeProperties[key].set;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto range = assumeSorted!((x,y){ return ucmp(x.name, y.name) < 0; })(unicodeProperties);
|
||||
auto range = assumeSorted!((x,y) => ucmp(x.name, y.name) < 0)(unicodeProperties);
|
||||
//creating empty Codepointset is a workaround
|
||||
auto eq = range.lowerBound(UnicodeProperty(cast(string)name,CodepointSet.init)).length;
|
||||
enforce(eq!=range.length && ucmp(name,range[eq].name)==0,"invalid property name");
|
||||
s = range[eq].set.dup;
|
||||
}
|
||||
}
|
||||
|
||||
if(casefold)
|
||||
s = caseEnclose(s);
|
||||
|
@ -873,15 +863,12 @@ struct Parser(R, bool CTFE=false)
|
|||
if(isSomeString!S)
|
||||
{
|
||||
pat = origin = pattern;
|
||||
//reserve slightly more then avg as sampled from unittests
|
||||
if(!__ctfe)
|
||||
ir.reserve(pat.length);
|
||||
ir.reserve((pat.length*5+2)/4);
|
||||
parseFlags(flags);
|
||||
_current = ' ';//a safe default for freeform parsing
|
||||
next();
|
||||
if(__ctfe)
|
||||
parseRegex();
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
parseRegex();
|
||||
|
@ -890,7 +877,6 @@ struct Parser(R, bool CTFE=false)
|
|||
{
|
||||
error(e.msg);//also adds pattern location
|
||||
}
|
||||
}
|
||||
put(Bytecode(IR.End, 0));
|
||||
}
|
||||
|
||||
|
@ -911,10 +897,8 @@ struct Parser(R, bool CTFE=false)
|
|||
empty = true;
|
||||
return false;
|
||||
}
|
||||
//for CTFEability
|
||||
size_t idx=0;
|
||||
_current = decode(pat, idx);
|
||||
pat = pat[idx..$];
|
||||
_current = pat.front;
|
||||
pat.popFront();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1250,7 +1234,7 @@ struct Parser(R, bool CTFE=false)
|
|||
default:
|
||||
if(replace)
|
||||
{
|
||||
moveAllAlt(ir[offset+1..$],ir[offset..$-1]);
|
||||
moveAll(ir[offset+1..$],ir[offset..$-1]);
|
||||
ir.length -= 1;
|
||||
}
|
||||
return;
|
||||
|
@ -1298,17 +1282,10 @@ struct Parser(R, bool CTFE=false)
|
|||
counterDepth = std.algorithm.max(counterDepth, nesting+1);
|
||||
}
|
||||
else if(replace)
|
||||
{
|
||||
if(__ctfe)//CTFE workaround: no moveAll and length -= x;
|
||||
{
|
||||
ir = ir[0..offset] ~ ir[offset+1..$];
|
||||
}
|
||||
else
|
||||
{
|
||||
moveAll(ir[offset+1 .. $],ir[offset .. $-1]);
|
||||
ir.length -= 1;
|
||||
}
|
||||
}
|
||||
put(Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len));
|
||||
enforce(ir.length + len < maxCompiledLength, "maximum compiled pattern length is exceeded");
|
||||
ir ~= ir[offset .. offset+len];
|
||||
|
@ -2162,13 +2139,8 @@ private:
|
|||
//
|
||||
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
|
||||
{//equal is @system?
|
||||
//@@@BUG@@@ assumeSorted kills "-inline"
|
||||
//auto fnd = assumeSorted(map!"a.name"(dict)).lowerBound(name).length;
|
||||
uint fnd;
|
||||
for(fnd = 0; fnd<dict.length; fnd++)
|
||||
if(equal(dict[fnd].name,name))
|
||||
break;
|
||||
enforce(fnd < dict.length, text("no submatch named ", name));
|
||||
auto fnd = assumeSorted!"cmp(a,b) < 0"(map!"a.name"(dict)).lowerBound(name).length;
|
||||
enforce(equal(dict[fnd].name, name), text("no submatch named ", name));
|
||||
return dict[fnd].group;
|
||||
}
|
||||
|
||||
|
@ -2766,7 +2738,7 @@ public:
|
|||
// returns only valid UTF indexes
|
||||
// (that given the haystack in question is valid UTF string)
|
||||
@trusted size_t search(const(Char)[] haystack, size_t idx)
|
||||
{
|
||||
{//@BUG: apparently assumes little endian machines
|
||||
assert(!empty);
|
||||
auto p = cast(const(ubyte)*)(haystack.ptr+idx);
|
||||
uint state = uint.max;
|
||||
|
@ -2779,9 +2751,10 @@ public:
|
|||
while(p != end)
|
||||
{
|
||||
if(!~state)
|
||||
{
|
||||
{//speed up seeking first matching place
|
||||
for(;;)
|
||||
{
|
||||
assert(p <= end, text(p," vs ", end));
|
||||
p = cast(ubyte*)memchr(p, fChar, end - p);
|
||||
if(!p)
|
||||
return haystack.length;
|
||||
|
@ -2796,31 +2769,40 @@ public:
|
|||
{
|
||||
state = (state<<1) | table[p[1]];
|
||||
state = (state<<1) | table[p[2]];
|
||||
p += 3;
|
||||
p += 4;
|
||||
}
|
||||
}
|
||||
//first char is already tested, see if that's all
|
||||
if(!(state & limit))//division rounds down for dchar
|
||||
else
|
||||
p++;
|
||||
//first char is tested, see if that's all
|
||||
if(!(state & limit))
|
||||
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
|
||||
-length+1;
|
||||
-length;
|
||||
}
|
||||
else
|
||||
{//have some bits/states for possible matches,
|
||||
//use the usual shift-or cycle
|
||||
static if(charSize == 3)
|
||||
{
|
||||
state = (state<<1) | table[p[0]];
|
||||
state = (state<<1) | table[p[1]];
|
||||
state = (state<<1) | table[p[2]];
|
||||
state = (state<<1) | table[p[3]];
|
||||
p+=4;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = (state<<1) | table[p[1]];
|
||||
state = (state<<1) | table[p[0]];
|
||||
p++;
|
||||
}
|
||||
if(!(state & limit))
|
||||
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
|
||||
-length;
|
||||
}
|
||||
debug(fred_search) writefln("State: %32b", state);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//in this path we have to shift first
|
||||
//normal path, partially unrolled for char/wchar
|
||||
static if(charSize == 3)
|
||||
{
|
||||
const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length);
|
||||
|
@ -4870,8 +4852,6 @@ enum OneShot { Fwd, Bwd };
|
|||
if(is(Char : dchar))
|
||||
{
|
||||
alias Stream.DataIndex DataIndex;
|
||||
alias const(Char)[] String;
|
||||
enum threadAllocSize = 16;
|
||||
Thread!DataIndex* freelist;
|
||||
ThreadList!DataIndex clist, nlist;
|
||||
DataIndex[] merge;
|
||||
|
@ -4978,7 +4958,6 @@ enum OneShot { Fwd, Bwd };
|
|||
writeln("------------------------------------------");
|
||||
if(exhausted)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
if(re.flags & RegexInfo.oneShot)
|
||||
|
@ -5039,8 +5018,7 @@ enum OneShot { Fwd, Bwd };
|
|||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
exhausted = true;
|
||||
|
||||
genCounter++; //increment also on each end
|
||||
debug(fred_matching) writefln("Threaded matching threads at end");
|
||||
//try out all zero-width posibilities
|
||||
|
@ -5050,8 +5028,17 @@ enum OneShot { Fwd, Bwd };
|
|||
}
|
||||
if(!matched)
|
||||
eval!false(createStart(index), matches);//new thread starting at end of input
|
||||
if(matched && !(re.flags & RegexOption.global))
|
||||
exhausted = true;
|
||||
if(matched)
|
||||
{//in case NFA found match along the way
|
||||
//and last possible longer alternative ultimately failed
|
||||
s.reset(matches[0].end);//reset to last successful match
|
||||
next();//and reload front character
|
||||
//--- here the exact state of stream was restored ---
|
||||
exhausted = atEnd || !(re.flags & RegexOption.global);
|
||||
//+ empty match advances the input
|
||||
if(!exhausted && matches[0].begin == matches[0].end)
|
||||
next();
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
|
@ -6278,6 +6265,24 @@ public:
|
|||
@property ref captures(){ return this; }
|
||||
}
|
||||
|
||||
unittest//verify example
|
||||
{
|
||||
auto m = match("@abc#", regex(`(\w)(\w)(\w)`));
|
||||
auto c = m.captures;
|
||||
assert(c.pre == "@");// part of input preceeding match
|
||||
assert(c.post == "#"); // immediately after match
|
||||
assert(c.hit == c[0] && c.hit == "abc");// the whole match
|
||||
assert(c[2] =="b");
|
||||
assert(c.front == "abc");
|
||||
c.popFront();
|
||||
assert(c.front == "a");
|
||||
assert(c.back == "c");
|
||||
c.popBack();
|
||||
assert(c.back == "b");
|
||||
popFrontN(c, 2);
|
||||
assert(c.empty);
|
||||
}
|
||||
|
||||
/++
|
||||
A regex engine state, as returned by $(D match) family of functions.
|
||||
|
||||
|
@ -6397,9 +6402,19 @@ public:
|
|||
|
||||
Throws: $(D RegexException) if there were any errors during compilation.
|
||||
+/
|
||||
public auto regex(S)(S pattern, const(char)[] flags="")
|
||||
@trusted public auto regex(S)(S pattern, const(char)[] flags="")
|
||||
if(isSomeString!(S))
|
||||
{
|
||||
enum cacheSize = 8; //TODO: invent nice interface to control regex caching
|
||||
if(__ctfe)
|
||||
return regexImpl(pattern, flags);
|
||||
return memoize!(regexImpl!S, cacheSize)(pattern, flags);
|
||||
}
|
||||
|
||||
public auto regexImpl(S)(S pattern, const(char)[] flags="")
|
||||
if(isSomeString!(S))
|
||||
{
|
||||
alias Regex!(BasicElementOf!S) Reg;
|
||||
if(!__ctfe)
|
||||
{
|
||||
auto parser = Parser!(Unqual!(typeof(pattern)))(pattern, flags);
|
||||
|
@ -7228,6 +7243,7 @@ unittest
|
|||
run_tests!match(); //thompson VM
|
||||
}
|
||||
}
|
||||
|
||||
version(fred_ct)
|
||||
{
|
||||
unittest
|
||||
|
@ -7424,6 +7440,11 @@ else
|
|||
if(ch != '-') //'--' is an operator
|
||||
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
|
||||
}
|
||||
//bugzilla 7718
|
||||
string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
|
||||
auto reStrCmd = regex (`(".*")|('.*')`, "g");
|
||||
assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
|
||||
[`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
|
||||
}
|
||||
test_body!bmatch();
|
||||
test_body!match();
|
||||
|
@ -7502,7 +7523,11 @@ else
|
|||
}
|
||||
unittest
|
||||
{//bugzilla 7111
|
||||
assert(!match("", regex("^")).empty);
|
||||
assert(match("", regex("^")));
|
||||
}
|
||||
unittest
|
||||
{//bugzilla 7300
|
||||
assert(!match("a"d, "aa"d));
|
||||
}
|
||||
|
||||
unittest
|
||||
|
@ -7523,4 +7548,4 @@ else
|
|||
}
|
||||
}
|
||||
|
||||
}
|
||||
}//version(unittest)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue