fix Issue 7300 - std.regex.ShiftOr!dchar.search is broken

Rework problematic memchr codepath to properly test for end of string. More importantly it's overall cleaner.
This commit is contained in:
Dmitry Olshansky 2012-02-27 00:18:46 +04:00
parent ba5eecf086
commit e53e6d64e9

View file

@ -2743,7 +2743,7 @@ public:
// returns only valid UTF indexes
// (that given the haystack in question is valid UTF string)
@trusted size_t search(const(Char)[] haystack, size_t idx)
{
{//@BUG: apparently assumes little endian machines
assert(!empty);
auto p = cast(const(ubyte)*)(haystack.ptr+idx);
uint state = uint.max;
@ -2756,9 +2756,10 @@ public:
while(p != end)
{
if(!~state)
{
{//speed up seeking first matching place
for(;;)
{
assert(p <= end, text(p," vs ", end));
p = cast(ubyte*)memchr(p, fChar, end - p);
if(!p)
return haystack.length;
@ -2773,31 +2774,40 @@ public:
{
state = (state<<1) | table[p[1]];
state = (state<<1) | table[p[2]];
p += 3;
p += 4;
}
}
//first char is already tested, see if that's all
if(!(state & limit))//division rounds down for dchar
else
p++;
//first char is tested, see if that's all
if(!(state & limit))
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-length+1;
-length;
}
else
{//have some bits/states for possible matches,
//use the usual shift-or cycle
static if(charSize == 3)
{
state = (state<<1) | table[p[0]];
state = (state<<1) | table[p[1]];
state = (state<<1) | table[p[2]];
state = (state<<1) | table[p[3]];
p+=4;
}
else
{
state = (state<<1) | table[p[1]];
state = (state<<1) | table[p[0]];
p++;
}
if(!(state & limit))
return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-length;
}
debug(fred_search) writefln("State: %32b", state);
}
}
else
{
//in this path we have to shift first
//normal path, partially unrolled for char/wchar
static if(charSize == 3)
{
const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length);