fix Issue 7300 - std.regex.ShiftOr!dchar.search is broken

Rework problematic memchr codepath to properly test for end of string. More importantly it's overall cleaner.
This commit is contained in:
Dmitry Olshansky 2012-02-27 00:18:46 +04:00
parent ba5eecf086
commit e53e6d64e9

View file

@ -2743,7 +2743,7 @@ public:
// returns only valid UTF indexes // returns only valid UTF indexes
// (that given the haystack in question is valid UTF string) // (that given the haystack in question is valid UTF string)
@trusted size_t search(const(Char)[] haystack, size_t idx) @trusted size_t search(const(Char)[] haystack, size_t idx)
{ {//@BUG: apparently assumes little endian machines
assert(!empty); assert(!empty);
auto p = cast(const(ubyte)*)(haystack.ptr+idx); auto p = cast(const(ubyte)*)(haystack.ptr+idx);
uint state = uint.max; uint state = uint.max;
@ -2756,9 +2756,10 @@ public:
while(p != end) while(p != end)
{ {
if(!~state) if(!~state)
{ {//speed up seeking first matching place
for(;;) for(;;)
{ {
assert(p <= end, text(p," vs ", end));
p = cast(ubyte*)memchr(p, fChar, end - p); p = cast(ubyte*)memchr(p, fChar, end - p);
if(!p) if(!p)
return haystack.length; return haystack.length;
@ -2773,31 +2774,40 @@ public:
{ {
state = (state<<1) | table[p[1]]; state = (state<<1) | table[p[1]];
state = (state<<1) | table[p[2]]; state = (state<<1) | table[p[2]];
p += 3; p += 4;
} }
} else
//first char is already tested, see if that's all p++;
if(!(state & limit))//division rounds down for dchar //first char is tested, see if that's all
if(!(state & limit))
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-length+1; -length;
}
else
{//have some bits/states for possible matches,
//use the usual shift-or cycle
static if(charSize == 3) static if(charSize == 3)
{ {
state = (state<<1) | table[p[0]];
state = (state<<1) | table[p[1]]; state = (state<<1) | table[p[1]];
state = (state<<1) | table[p[2]]; state = (state<<1) | table[p[2]];
state = (state<<1) | table[p[3]];
p+=4; p+=4;
} }
else else
{ {
state = (state<<1) | table[p[1]]; state = (state<<1) | table[p[0]];
p++; p++;
} }
if(!(state & limit))
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-length;
}
debug(fred_search) writefln("State: %32b", state); debug(fred_search) writefln("State: %32b", state);
} }
} }
else else
{ {
//in this path we have to shift first //normal path, partially unrolled for char/wchar
static if(charSize == 3) static if(charSize == 3)
{ {
const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length); const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length);