phobos/std/regex/internal/thompson.d
Dmitry Olshansky 2b78074fc2 Split up the intertwined mess of std.regex.
The docs and API still stay in one file.
With time and refactoring more internals may be
exposed such as parser, each engine explicitly
and the sample generator (generator.d).

Also inclusdes changes prompted by reviews/pulls:

Convert spaces-->tabs in makefiles.
Move things (again) to std/regex/internal.
Use new package(std.regex) feature.
Remove C-style arrays (some other pull against regex.d).
2014-09-13 13:45:46 +04:00

947 lines
32 KiB
D

//Written in the D programming language
/*
Implementation of Thompson NFA std.regex engine.
Key point is evaluation of all possible threads (state) at each step
in a breadth-first manner, thereby geting some nice properties:
- looking at each character only once
- merging of equivalent threads, that gives matching process linear time complexity
*/
module std.regex.internal.thompson;
package(std.regex):
import std.regex.internal.ir;
import std.range;
//State of VM thread
struct Thread(DataIndex)
{
Thread* next; //intrusive linked list
uint pc;
uint counter; //loop counter
uint uopCounter; //counts micro operations inside one macro instruction (e.g. BackRef)
Group!DataIndex[1] matches;
}
//head-tail singly-linked list
struct ThreadList(DataIndex)
{
Thread!DataIndex* tip = null, toe = null;
//add new thread to the start of list
void insertFront(Thread!DataIndex* t)
{
if(tip)
{
t.next = tip;
tip = t;
}
else
{
t.next = null;
tip = toe = t;
}
}
//add new thread to the end of list
void insertBack(Thread!DataIndex* t)
{
if(toe)
{
toe.next = t;
toe = t;
}
else
tip = toe = t;
toe.next = null;
}
//move head element out of list
Thread!DataIndex* fetch()
{
auto t = tip;
if(tip == toe)
tip = toe = null;
else
tip = tip.next;
return t;
}
//non-destructive iteration of ThreadList
struct ThreadRange
{
const(Thread!DataIndex)* ct;
this(ThreadList tlist){ ct = tlist.tip; }
@property bool empty(){ return ct is null; }
@property const(Thread!DataIndex)* front(){ return ct; }
@property popFront()
{
assert(ct);
ct = ct.next;
}
}
@property bool empty()
{
return tip == null;
}
ThreadRange opSlice()
{
return ThreadRange(this);
}
}
/+
Thomspon matcher does all matching in lockstep,
never looking at the same char twice
+/
@trusted struct ThompsonMatcher(Char, Stream = Input!Char)
if(is(Char : dchar))
{
alias DataIndex = Stream.DataIndex;
Thread!DataIndex* freelist;
ThreadList!DataIndex clist, nlist;
DataIndex[] merge;
Group!DataIndex[] backrefed;
Regex!Char re; //regex program
Stream s;
dchar front;
DataIndex index;
DataIndex genCounter; //merge trace counter, goes up on every dchar
size_t[size_t] subCounters; //a table of gen counter per sub-engine: PC -> counter
size_t threadSize;
bool matched;
bool exhausted;
static if(__traits(hasMember,Stream, "search"))
{
enum kicked = true;
}
else
enum kicked = false;
static size_t getThreadSize(const ref Regex!Char re)
{
return re.ngroup
? (Thread!DataIndex).sizeof + (re.ngroup-1)*(Group!DataIndex).sizeof
: (Thread!DataIndex).sizeof - (Group!DataIndex).sizeof;
}
static size_t initialMemory(const ref Regex!Char re)
{
return getThreadSize(re)*re.threadCount + re.hotspotTableSize*size_t.sizeof;
}
//true if it's start of input
@property bool atStart(){ return index == 0; }
//true if it's end of input
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
bool next()
{
if(!s.nextChar(front, index))
{
index = s.lastIndex;
return false;
}
return true;
}
static if(kicked)
{
bool search()
{
if(!s.search(re.kickstart, front, index))
{
index = s.lastIndex;
return false;
}
return true;
}
}
void initExternalMemory(void[] memory)
{
threadSize = getThreadSize(re);
prepareFreeList(re.threadCount, memory);
if(re.hotspotTableSize)
{
merge = arrayInChunk!(DataIndex)(re.hotspotTableSize, memory);
merge[] = 0;
}
}
this()(Regex!Char program, Stream stream, void[] memory)
{
re = program;
s = stream;
initExternalMemory(memory);
genCounter = 0;
}
this(S)(ref ThompsonMatcher!(Char,S) matcher, Bytecode[] piece, Stream stream)
{
s = stream;
re = matcher.re;
re.ir = piece;
threadSize = matcher.threadSize;
merge = matcher.merge;
freelist = matcher.freelist;
front = matcher.front;
index = matcher.index;
}
auto fwdMatcher()(Bytecode[] piece, size_t counter)
{
auto m = ThompsonMatcher!(Char, Stream)(this, piece, s);
m.genCounter = counter;
return m;
}
auto bwdMatcher()(Bytecode[] piece, size_t counter)
{
alias BackLooper = typeof(s.loopBack(index));
auto m = ThompsonMatcher!(Char, BackLooper)(this, piece, s.loopBack(index));
m.genCounter = counter;
m.next();
return m;
}
auto dupTo(void[] memory)
{
typeof(this) tmp = this;//bitblit
tmp.initExternalMemory(memory);
tmp.genCounter = 0;
return tmp;
}
enum MatchResult{
NoMatch,
PartialMatch,
Match,
}
bool match(Group!DataIndex[] matches)
{
debug(std_regex_matcher)
writeln("------------------------------------------");
if(exhausted)
{
return false;
}
if(re.flags & RegexInfo.oneShot)
{
next();
exhausted = true;
return matchOneShot(matches)==MatchResult.Match;
}
static if(kicked)
if(!re.kickstart.empty)
return matchImpl!(true)(matches);
return matchImpl!(false)(matches);
}
//match the input and fill matches
bool matchImpl(bool withSearch)(Group!DataIndex[] matches)
{
if(!matched && clist.empty)
{
static if(withSearch)
search();
else
next();
}
else//char in question is fetched in prev call to match
{
matched = false;
}
if(!atEnd)//if no char
for(;;)
{
genCounter++;
debug(std_regex_matcher)
{
writefln("Threaded matching threads at %s", s[index..s.lastIndex]);
foreach(t; clist[])
{
assert(t);
writef("pc=%s ",t.pc);
write(t.matches);
writeln();
}
}
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
{
eval!true(t, matches);
}
if(!matched)//if we already have match no need to push the engine
eval!true(createStart(index), matches);//new thread staring at this position
else if(nlist.empty)
{
debug(std_regex_matcher) writeln("Stopped matching before consuming full input");
break;//not a partial match for sure
}
clist = nlist;
nlist = (ThreadList!DataIndex).init;
if(clist.tip is null)
{
static if(withSearch)
{
if(!search())
break;
}
else
{
if(!next())
break;
}
}
else if(!next())
{
if (!atEnd) return false;
exhausted = true;
break;
}
}
genCounter++; //increment also on each end
debug(std_regex_matcher) writefln("Threaded matching threads at end");
//try out all zero-width posibilities
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
{
eval!false(t, matches);
}
if(!matched)
eval!false(createStart(index), matches);//new thread starting at end of input
if(matched)
{//in case NFA found match along the way
//and last possible longer alternative ultimately failed
s.reset(matches[0].end);//reset to last successful match
next();//and reload front character
//--- here the exact state of stream was restored ---
exhausted = atEnd || !(re.flags & RegexOption.global);
//+ empty match advances the input
if(!exhausted && matches[0].begin == matches[0].end)
next();
}
return matched;
}
/+
handle succesful threads
+/
void finish(const(Thread!DataIndex)* t, Group!DataIndex[] matches)
{
matches.ptr[0..re.ngroup] = t.matches.ptr[0..re.ngroup];
debug(std_regex_matcher)
{
writef("FOUND pc=%s prog_len=%s",
t.pc, re.ir.length);
if(!matches.empty)
writefln(": %s..%s", matches[0].begin, matches[0].end);
foreach(v; matches)
writefln("%d .. %d", v.begin, v.end);
}
matched = true;
}
/+
match thread against codepoint, cutting trough all 0-width instructions
and taking care of control flow, then add it to nlist
+/
void eval(bool withInput)(Thread!DataIndex* t, Group!DataIndex[] matches)
{
ThreadList!DataIndex worklist;
debug(std_regex_matcher) writeln("---- Evaluating thread");
for(;;)
{
debug(std_regex_matcher)
{
writef("\tpc=%s [", t.pc);
foreach(x; worklist[])
writef(" %s ", x.pc);
writeln("]");
}
switch(re.ir[t.pc].code)
{
case IR.End:
finish(t, matches);
matches[0].end = index; //fix endpoint of the whole match
recycle(t);
//cut off low priority threads
recycle(clist);
recycle(worklist);
debug(std_regex_matcher) writeln("Finished thread ", matches);
return;
case IR.Wordboundary:
dchar back;
DataIndex bi;
//at start & end of input
if(atStart && wordTrie[front])
{
t.pc += IRL!(IR.Wordboundary);
break;
}
else if(atEnd && s.loopBack(index).nextChar(back, bi)
&& wordTrie[back])
{
t.pc += IRL!(IR.Wordboundary);
break;
}
else if(s.loopBack(index).nextChar(back, bi))
{
bool af = wordTrie[front];
bool ab = wordTrie[back];
if(af ^ ab)
{
t.pc += IRL!(IR.Wordboundary);
break;
}
}
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Notwordboundary:
dchar back;
DataIndex bi;
//at start & end of input
if(atStart && wordTrie[front])
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
else if(atEnd && s.loopBack(index).nextChar(back, bi)
&& wordTrie[back])
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
else if(s.loopBack(index).nextChar(back, bi))
{
bool af = wordTrie[front];
bool ab = wordTrie[back] != 0;
if(af ^ ab)
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
}
t.pc += IRL!(IR.Wordboundary);
break;
case IR.Bol:
dchar back;
DataIndex bi;
if(atStart
||( (re.flags & RegexOption.multiline)
&& s.loopBack(index).nextChar(back,bi)
&& startOfLine(back, front == '\n')))
{
t.pc += IRL!(IR.Bol);
}
else
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
}
break;
case IR.Eol:
debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index..s.lastIndex]);
dchar back;
DataIndex bi;
//no matching inside \r\n
if(atEnd || ((re.flags & RegexOption.multiline)
&& endOfLine(front, s.loopBack(index).nextChar(back, bi)
&& back == '\r')))
{
t.pc += IRL!(IR.Eol);
}
else
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
}
break;
case IR.InfiniteStart, IR.InfiniteQStart:
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
goto case IR.InfiniteEnd; //both Q and non-Q
case IR.RepeatStart, IR.RepeatQStart:
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
goto case IR.RepeatEnd; //both Q and non-Q
case IR.RepeatEnd:
case IR.RepeatQEnd:
//len, step, min, max
uint len = re.ir[t.pc].data;
uint step = re.ir[t.pc+2].raw;
uint min = re.ir[t.pc+3].raw;
if(t.counter < min)
{
t.counter += step;
t.pc -= len;
break;
}
if(merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
{
debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
}
else
{
debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
uint max = re.ir[t.pc+4].raw;
if(t.counter < max)
{
if(re.ir[t.pc].code == IR.RepeatEnd)
{
//queue out-of-loop thread
worklist.insertFront(fork(t, t.pc + IRL!(IR.RepeatEnd), t.counter % step));
t.counter += step;
t.pc -= len;
}
else
{
//queue into-loop thread
worklist.insertFront(fork(t, t.pc - len, t.counter + step));
t.counter %= step;
t.pc += IRL!(IR.RepeatEnd);
}
}
else
{
t.counter %= step;
t.pc += IRL!(IR.RepeatEnd);
}
break;
case IR.InfiniteEnd:
case IR.InfiniteQEnd:
if(merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
{
debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
}
else
{
debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
uint len = re.ir[t.pc].data;
uint pc1, pc2; //branches to take in priority order
if(re.ir[t.pc].code == IR.InfiniteEnd)
{
pc1 = t.pc - len;
pc2 = t.pc + IRL!(IR.InfiniteEnd);
}
else
{
pc1 = t.pc + IRL!(IR.InfiniteEnd);
pc2 = t.pc - len;
}
static if(withInput)
{
int test = quickTestFwd(pc1, front, re);
if(test >= 0)
{
worklist.insertFront(fork(t, pc2, t.counter));
t.pc = pc1;
}
else
t.pc = pc2;
}
else
{
worklist.insertFront(fork(t, pc2, t.counter));
t.pc = pc1;
}
break;
case IR.OrEnd:
if(merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
{
debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
t.pc += IRL!(IR.OrEnd);
}
else
{
debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
recycle(t);
t = worklist.fetch();
if(!t)
return;
}
break;
case IR.OrStart:
t.pc += IRL!(IR.OrStart);
goto case;
case IR.Option:
uint next = t.pc + re.ir[t.pc].data + IRL!(IR.Option);
//queue next Option
if(re.ir[next].code == IR.Option)
{
worklist.insertFront(fork(t, next, t.counter));
}
t.pc += IRL!(IR.Option);
break;
case IR.GotoEndOr:
t.pc = t.pc + re.ir[t.pc].data + IRL!(IR.GotoEndOr);
goto case IR.OrEnd;
case IR.GroupStart:
uint n = re.ir[t.pc].data;
t.matches.ptr[n].begin = index;
t.pc += IRL!(IR.GroupStart);
break;
case IR.GroupEnd:
uint n = re.ir[t.pc].data;
t.matches.ptr[n].end = index;
t.pc += IRL!(IR.GroupEnd);
break;
case IR.Backref:
uint n = re.ir[t.pc].data;
Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
assert(source);
if(source[n].begin == source[n].end)//zero-width Backref!
{
t.pc += IRL!(IR.Backref);
}
else static if(withInput)
{
size_t idx = source[n].begin + t.uopCounter;
size_t end = source[n].end;
if(s[idx..end].front == front)
{
t.uopCounter += std.utf.stride(s[idx..end], 0);
if(t.uopCounter + source[n].begin == source[n].end)
{//last codepoint
t.pc += IRL!(IR.Backref);
t.uopCounter = 0;
}
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
else
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
break;
case IR.LookbehindStart:
case IR.NeglookbehindStart:
uint len = re.ir[t.pc].data;
uint ms = re.ir[t.pc + 1].raw, me = re.ir[t.pc + 2].raw;
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
bool positive = re.ir[t.pc].code == IR.LookbehindStart;
static if(Stream.isLoopback)
auto matcher = fwdMatcher(re.ir[t.pc .. end], subCounters.get(t.pc, 0));
else
auto matcher = bwdMatcher(re.ir[t.pc .. end], subCounters.get(t.pc, 0));
matcher.re.ngroup = me - ms;
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
//backMatch
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
freelist = matcher.freelist;
subCounters[t.pc] = matcher.genCounter;
if((mRes == MatchResult.Match) ^ positive)
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
else
t.pc = end;
break;
case IR.LookaheadStart:
case IR.NeglookaheadStart:
auto save = index;
uint len = re.ir[t.pc].data;
uint ms = re.ir[t.pc+1].raw, me = re.ir[t.pc+2].raw;
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
bool positive = re.ir[t.pc].code == IR.LookaheadStart;
static if(Stream.isLoopback)
auto matcher = bwdMatcher(re.ir[t.pc .. end], subCounters.get(t.pc, 0));
else
auto matcher = fwdMatcher(re.ir[t.pc .. end], subCounters.get(t.pc, 0));
matcher.re.ngroup = me - ms;
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
freelist = matcher.freelist;
subCounters[t.pc] = matcher.genCounter;
s.reset(index);
next();
if((mRes == MatchResult.Match) ^ positive)
{
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
}
else
t.pc = end;
break;
case IR.LookaheadEnd:
case IR.NeglookaheadEnd:
case IR.LookbehindEnd:
case IR.NeglookbehindEnd:
finish(t, matches.ptr[0 .. re.ngroup]);
recycle(t);
//cut off low priority threads
recycle(clist);
recycle(worklist);
return;
case IR.Nop:
t.pc += IRL!(IR.Nop);
break;
static if(withInput)
{
case IR.OrChar:
uint len = re.ir[t.pc].sequence;
uint end = t.pc + len;
static assert(IRL!(IR.OrChar) == 1);
for(; t.pc < end; t.pc++)
if(re.ir[t.pc].data == front)
break;
if(t.pc != end)
{
t.pc = end;
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Char:
if(front == re.ir[t.pc].data)
{
t.pc += IRL!(IR.Char);
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Any:
t.pc += IRL!(IR.Any);
if(!(re.flags & RegexOption.singleline)
&& (front == '\r' || front == '\n'))
recycle(t);
else
nlist.insertBack(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.CodepointSet:
if(re.charsets[re.ir[t.pc].data].scanFor(front))
{
t.pc += IRL!(IR.CodepointSet);
nlist.insertBack(t);
}
else
{
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
case IR.Trie:
if(re.tries[re.ir[t.pc].data][front])
{
t.pc += IRL!(IR.Trie);
nlist.insertBack(t);
}
else
{
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
default:
assert(0, "Unrecognized instruction " ~ re.ir[t.pc].mnemonic);
}
else
{
default:
recycle(t);
t = worklist.fetch();
if(!t)
return;
}
}
}
}
enum uint RestartPc = uint.max;
//match the input, evaluating IR without searching
MatchResult matchOneShot(Group!DataIndex[] matches, uint startPc = 0)
{
debug(std_regex_matcher)
{
writefln("---------------single shot match ----------------- ");
}
alias evalFn = eval;
assert(clist == (ThreadList!DataIndex).init || startPc == RestartPc); // incorrect after a partial match
assert(nlist == (ThreadList!DataIndex).init || startPc == RestartPc);
if(!atEnd)//if no char
{
debug(std_regex_matcher)
{
writefln("-- Threaded matching threads at %s", s[index..s.lastIndex]);
}
if(startPc!=RestartPc)
{
auto startT = createStart(index, startPc);
genCounter++;
evalFn!true(startT, matches);
}
for(;;)
{
debug(std_regex_matcher) writeln("\n-- Started iteration of main cycle");
genCounter++;
debug(std_regex_matcher)
{
foreach(t; clist[])
{
assert(t);
}
}
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
{
evalFn!true(t, matches);
}
if(nlist.empty)
{
debug(std_regex_matcher) writeln("Stopped matching before consuming full input");
break;//not a partial match for sure
}
clist = nlist;
nlist = (ThreadList!DataIndex).init;
if(!next())
{
if (!atEnd) return MatchResult.PartialMatch;
break;
}
debug(std_regex_matcher) writeln("-- Ended iteration of main cycle\n");
}
}
genCounter++; //increment also on each end
debug(std_regex_matcher) writefln("-- Matching threads at end");
//try out all zero-width posibilities
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
{
evalFn!false(t, matches);
}
if(!matched)
evalFn!false(createStart(index, startPc), matches);
return (matched?MatchResult.Match:MatchResult.NoMatch);
}
//get a dirty recycled Thread
Thread!DataIndex* allocate()
{
assert(freelist, "not enough preallocated memory");
Thread!DataIndex* t = freelist;
freelist = freelist.next;
return t;
}
//link memory into a free list of Threads
void prepareFreeList(size_t size, ref void[] memory)
{
void[] mem = memory[0 .. threadSize*size];
memory = memory[threadSize * size .. $];
freelist = cast(Thread!DataIndex*)&mem[0];
size_t i;
for(i = threadSize; i < threadSize*size; i += threadSize)
(cast(Thread!DataIndex*)&mem[i-threadSize]).next = cast(Thread!DataIndex*)&mem[i];
(cast(Thread!DataIndex*)&mem[i-threadSize]).next = null;
}
//dispose a thread
void recycle(Thread!DataIndex* t)
{
t.next = freelist;
freelist = t;
}
//dispose list of threads
void recycle(ref ThreadList!DataIndex list)
{
auto t = list.tip;
while(t)
{
auto next = t.next;
recycle(t);
t = next;
}
list = list.init;
}
//creates a copy of master thread with given pc
Thread!DataIndex* fork(Thread!DataIndex* master, uint pc, uint counter)
{
auto t = allocate();
t.matches.ptr[0..re.ngroup] = master.matches.ptr[0..re.ngroup];
t.pc = pc;
t.counter = counter;
t.uopCounter = 0;
return t;
}
//creates a start thread
Thread!DataIndex* createStart(DataIndex index, uint pc = 0)
{
auto t = allocate();
t.matches.ptr[0..re.ngroup] = (Group!DataIndex).init;
t.matches[0].begin = index;
t.pc = pc;
t.counter = 0;
t.uopCounter = 0;
return t;
}
}