Merge pull request #481 from blackwhale/regex-fix-parser

fix unreported bug in std.regex parser
This commit is contained in:
David Simcha 2012-03-15 19:13:48 -07:00
commit 8ed20feaf7

View file

@ -227,7 +227,7 @@ import core.bitop, core.stdc.string, core.stdc.stdlib;
import ascii = std.ascii; import ascii = std.ascii;
import std.string : representation; import std.string : representation;
version(unittest) debug import std.stdio; debug import std.stdio;
private: private:
@safe: @safe:
@ -1148,15 +1148,23 @@ struct Parser(R, bool CTFE=false)
put(Bytecode(IR.Option, 0)); put(Bytecode(IR.Option, 0));
break; break;
} }
uint len, orStart;
//start a new option //start a new option
if(fixupStack.length == 1)//only root entry if(fixupStack.length == 1)
fix = -1; {//only root entry, effectively no fixup
uint len = cast(uint)ir.length - fix; len = cast(uint)ir.length + IRL!(IR.GotoEndOr);
insertInPlaceAlt(ir, fix+1, Bytecode(IR.OrStart, 0), Bytecode(IR.Option, len)); orStart = 0;
assert(ir[fix+1].code == IR.OrStart); }
else
{//IR.lookahead, etc. fixups that have length > 1, thus check ir[x].length
len = cast(uint)ir.length - fix - (ir[fix].length - 1);
orStart = fix + ir[fix].length;
}
insertInPlaceAlt(ir, orStart, Bytecode(IR.OrStart, 0), Bytecode(IR.Option, len));
assert(ir[orStart].code == IR.OrStart);
put(Bytecode(IR.GotoEndOr, 0)); put(Bytecode(IR.GotoEndOr, 0));
fixupStack.push(fix+1); //fixup for StartOR fixupStack.push(orStart); //fixup for StartOR
fixupStack.push(cast(uint)ir.length); //for Option fixupStack.push(cast(uint)ir.length); //for second Option
put(Bytecode(IR.Option, 0)); put(Bytecode(IR.Option, 0));
break; break;
default://no groups or whatever default://no groups or whatever
@ -2117,7 +2125,6 @@ private:
//print out disassembly a program's IR //print out disassembly a program's IR
@trusted debug public void print() const @trusted debug public void print() const
{//@@@BUG@@@ write is system {//@@@BUG@@@ write is system
import std.stdio;
writefln("PC\tINST\n"); writefln("PC\tINST\n");
prettyPrint(delegate void(const(char)[] s){ write(s); },ir); prettyPrint(delegate void(const(char)[] s){ write(s); },ir);
writefln("\n"); writefln("\n");
@ -4041,7 +4048,7 @@ template BacktrackingMatcher(bool CTregex)
pc -= len; pc -= len;
assert(re.ir[pc].code == IR.Option); assert(re.ir[pc].code == IR.Option);
len = re.ir[pc].data; len = re.ir[pc].data;
auto pc_save = pc+len-1; auto pc_save = pc+len-IRL!(IR.GotoEndOr);
pc = pc + len + IRL!(IR.Option); pc = pc + len + IRL!(IR.Option);
while(re.ir[pc].code == IR.Option) while(re.ir[pc].code == IR.Option)
{ {
@ -7121,6 +7128,8 @@ unittest
TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"), TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"), TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"),
TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"), TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"),
TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"),
TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"),
//mixed lookaround //mixed lookaround
TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"), TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"),
TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"), TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"),