//Written in the D programming language /* Regular expression pattern parser. */ module std.regex.internal.parser; import std.regex.internal.ir; import std.algorithm, std.range, std.uni, std.meta, std.traits, std.typecons, std.exception; static import std.ascii; // package relevant info from parser into a regex object auto makeRegex(S)(Parser!S p) { Regex!(BasicElementOf!S) re; with(re) { ir = p.ir; dict = p.dict; ngroup = p.groupStack.top; maxCounterDepth = p.counterDepth; flags = p.re_flags; charsets = p.charsets; matchers = p.matchers; backrefed = p.backrefed; re.lightPostprocess(); debug(std_regex_parser) { print(); } //@@@BUG@@@ (not reduced) //somehow just using validate _collides_ with std.utf.validate (!) version(assert) re.validateRe(); } return re; } // helper for unittest auto makeRegex(S)(S arg) if(isSomeString!S) { return makeRegex(Parser!S(arg, "")); } unittest { auto re = makeRegex(`(?P\w+) = (?P\d+)`); auto nc = re.namedCaptures; static assert(isRandomAccessRange!(typeof(nc))); assert(!nc.empty); assert(nc.length == 2); assert(nc.equal(["name", "var"])); assert(nc[0] == "name"); assert(nc[1..$].equal(["var"])); re = makeRegex(`(\w+) (?P\w+) (\w+)`); nc = re.namedCaptures; assert(nc.length == 1); assert(nc[0] == "named"); assert(nc.front == "named"); assert(nc.back == "named"); re = makeRegex(`(\w+) (\w+)`); nc = re.namedCaptures; assert(nc.empty); re = makeRegex(`(?P\d{4})/(?P\d{2})/(?P\d{2})/`); nc = re.namedCaptures; auto cp = nc.save; assert(nc.equal(cp)); nc.popFront(); assert(nc.equal(cp[1..$])); nc.popBack(); assert(nc.equal(cp[1 .. $ - 1])); } @trusted void reverseBytecode()(Bytecode[] code) { Bytecode[] rev = new Bytecode[code.length]; uint revPc = cast(uint)rev.length; Stack!(Tuple!(uint, uint, uint)) stack; uint start = 0; uint end = cast(uint)code.length; for(;;) { for(uint pc = start; pc < end; ) { uint len = code[pc].length; if(code[pc].code == IR.GotoEndOr) break; //pick next alternation branch if(code[pc].isAtom) { rev[revPc - len .. revPc] = code[pc .. pc + len]; revPc -= len; pc += len; } else if(code[pc].isStart || code[pc].isEnd) { //skip over other embedded lookbehinds they are reversed if(code[pc].code == IR.LookbehindStart || code[pc].code == IR.NeglookbehindStart) { uint blockLen = len + code[pc].data + code[pc].pairedLength; rev[revPc - blockLen .. revPc] = code[pc .. pc + blockLen]; pc += blockLen; revPc -= blockLen; continue; } uint second = code[pc].indexOfPair(pc); uint secLen = code[second].length; rev[revPc - secLen .. revPc] = code[second .. second + secLen]; revPc -= secLen; if(code[pc].code == IR.OrStart) { //we pass len bytes forward, but secLen in reverse uint revStart = revPc - (second + len - secLen - pc); uint r = revStart; uint i = pc + IRL!(IR.OrStart); while(code[i].code == IR.Option) { if(code[i - 1].code != IR.OrStart) { assert(code[i - 1].code == IR.GotoEndOr); rev[r - 1] = code[i - 1]; } rev[r] = code[i]; auto newStart = i + IRL!(IR.Option); auto newEnd = newStart + code[i].data; auto newRpc = r + code[i].data + IRL!(IR.Option); if(code[newEnd].code != IR.OrEnd) { newRpc--; } stack.push(tuple(newStart, newEnd, newRpc)); r += code[i].data + IRL!(IR.Option); i += code[i].data + IRL!(IR.Option); } pc = i; revPc = revStart; assert(code[pc].code == IR.OrEnd); } else pc += len; } } if(stack.empty) break; start = stack.top[0]; end = stack.top[1]; revPc = stack.top[2]; stack.pop(); } code[] = rev[]; } alias Escapables = AliasSeq!('[', ']', '\\', '^', '$', '.', '|', '?', ',', '-', ';', ':', '#', '&', '%', '/', '<', '>', '`', '*', '+', '(', ')', '{', '}', '~'); //test if a given string starts with hex number of maxDigit that's a valid codepoint //returns it's value and skips these maxDigit chars on success, throws on failure dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit) { //std.conv.parse is both @system and bogus enforce(str.length >= maxDigit,"incomplete escape sequence"); uint val; for(int k = 0; k < maxDigit; k++) { auto current = str[k];//accepts ascii only, so it's OK to index directly if('0' <= current && current <= '9') val = val * 16 + current - '0'; else if('a' <= current && current <= 'f') val = val * 16 + current -'a' + 10; else if('A' <= current && current <= 'F') val = val * 16 + current - 'A' + 10; else throw new Exception("invalid escape sequence"); } enforce(val <= 0x10FFFF, "invalid codepoint"); str = str[maxDigit..$]; return val; } @system unittest //BUG canFind is system { string[] non_hex = [ "000j", "000z", "FffG", "0Z"]; string[] hex = [ "01", "ff", "00af", "10FFFF" ]; int[] value = [ 1, 0xFF, 0xAF, 0x10FFFF ]; foreach(v; non_hex) assert(collectException(parseUniHex(v, v.length)).msg .canFind("invalid escape sequence")); foreach(i, v; hex) assert(parseUniHex(v, v.length) == value[i]); string over = "0011FFFF"; assert(collectException(parseUniHex(over, over.length)).msg .canFind("invalid codepoint")); } auto caseEnclose(CodepointSet set) { auto cased = set & unicode.LC; foreach (dchar ch; cased.byCodepoint) { foreach(c; simpleCaseFoldings(ch)) set |= c; } return set; } /+ fetch codepoint set corresponding to a name (InBlock or binary property) +/ @trusted CodepointSet getUnicodeSet(in char[] name, bool negated, bool casefold) { CodepointSet s = unicode(name); //FIXME: caseEnclose for new uni as Set | CaseEnclose(SET && LC) if(casefold) s = caseEnclose(s); if(negated) s = s.inverted; return s; } //basic stack, just in case it gets used anywhere else then Parser @trusted struct Stack(T) { T[] data; @property bool empty(){ return data.empty; } @property size_t length(){ return data.length; } void push(T val){ data ~= val; } T pop() { assert(!empty); auto val = data[$ - 1]; data = data[0 .. $ - 1]; if(!__ctfe) cast(void)data.assumeSafeAppend(); return val; } @property ref T top() { assert(!empty); return data[$ - 1]; } } //safety limits enum maxGroupNumber = 2^^19; enum maxLookaroundDepth = 16; // *Bytecode.sizeof, i.e. 1Mb of bytecode alone enum maxCompiledLength = 2^^18; //amounts to up to 4 Mb of auxilary table for matching enum maxCumulativeRepetitionLength = 2^^20; struct Parser(R) if (isForwardRange!R && is(ElementType!R : dchar)) { enum infinite = ~0u; dchar _current; bool empty; R pat, origin; //keep full pattern for pretty printing error messages Bytecode[] ir; //resulting bytecode uint re_flags = 0; //global flags e.g. multiline + internal ones Stack!(uint) fixupStack; //stack of opened start instructions NamedGroup[] dict; //maps name -> user group number //current num of group, group nesting level and repetitions step Stack!(uint) groupStack; uint nesting = 0; uint lookaroundNest = 0; uint counterDepth = 0; //current depth of nested counted repetitions CodepointSet[] charsets; // const(CharMatcher)[] matchers; // uint[] backrefed; //bitarray for groups @trusted this(S)(R pattern, S flags) if(isSomeString!S) { pat = origin = pattern; //reserve slightly more then avg as sampled from unittests if(!__ctfe) ir.reserve((pat.length*5+2)/4); parseFlags(flags); _current = ' ';//a safe default for freeform parsing next(); try { parseRegex(); } catch(Exception e) { error(e.msg);//also adds pattern location } put(Bytecode(IR.End, 0)); } //mark referenced groups for latter processing void markBackref(uint n) { if(n/32 >= backrefed.length) backrefed.length = n/32 + 1; backrefed[n / 32] |= 1 << (n & 31); } bool isOpenGroup(uint n) { // walk the fixup stack and see if there are groups labeled 'n' // fixup '0' is reserved for alternations return fixupStack.data[1..$]. canFind!(fix => ir[fix].code == IR.GroupStart && ir[fix].data == n)(); } @property dchar current(){ return _current; } bool _next() { if(pat.empty) { empty = true; return false; } _current = pat.front; pat.popFront(); return true; } void skipSpace() { while(isWhite(current) && _next()){ } } bool next() { if(re_flags & RegexOption.freeform) { bool r = _next(); skipSpace(); return r; } else return _next(); } void put(Bytecode code) { enforce(ir.length < maxCompiledLength, "maximum compiled pattern length is exceeded"); ir ~= code; } void putRaw(uint number) { enforce(ir.length < maxCompiledLength, "maximum compiled pattern length is exceeded"); ir ~= Bytecode.fromRaw(number); } //parsing number with basic overflow check uint parseDecimal() { uint r = 0; while(std.ascii.isDigit(current)) { if(r >= (uint.max/10)) error("Overflow in decimal number"); r = 10*r + cast(uint)(current-'0'); if(!next()) break; } return r; } //parse control code of form \cXXX, c assumed to be the current symbol dchar parseControlCode() { enforce(next(), "Unfinished escape sequence"); enforce(('a' <= current && current <= 'z') || ('A' <= current && current <= 'Z'), "Only letters are allowed after \\c"); return current & 0x1f; } // @trusted void parseFlags(S)(S flags) {//@@@BUG@@@ text is @system import std.conv; foreach(ch; flags)//flags are ASCII anyway { L_FlagSwitch: switch(ch) { foreach(i, op; __traits(allMembers, RegexOption)) { case RegexOptionNames[i]: if(re_flags & mixin("RegexOption."~op)) throw new RegexException(text("redundant flag specified: ",ch)); re_flags |= mixin("RegexOption."~op); break L_FlagSwitch; } default: throw new RegexException(text("unknown regex flag '",ch,"'")); } } } //parse and store IR for regex pattern @trusted void parseRegex() { fixupStack.push(0); groupStack.push(1);//0 - whole match auto maxCounterDepth = counterDepth; uint fix;//fixup pointer while(!empty) { debug(std_regex_parser) writeln("*LR*\nSource: ", pat, "\nStack: ",fixupStack.data); switch(current) { case '(': next(); nesting++; uint nglob; fixupStack.push(cast(uint)ir.length); if(current == '?') { next(); switch(current) { case ':': put(Bytecode(IR.Nop, 0)); next(); break; case '=': genLookaround(IR.LookaheadStart); next(); break; case '!': genLookaround(IR.NeglookaheadStart); next(); break; case 'P': next(); if(current != '<') error("Expected '<' in named group"); string name; if(!next() || !(isAlpha(current) || current == '_')) error("Expected alpha starting a named group"); name ~= current; while(next() && (isAlpha(current) || current == '_' || std.ascii.isDigit(current))) { name ~= current; } if(current != '>') error("Expected '>' closing named group"); next(); nglob = groupStack.top++; enforce(groupStack.top <= maxGroupNumber, "limit on submatches is exceeded"); auto t = NamedGroup(name, nglob); auto d = assumeSorted!"a.name < b.name"(dict); auto ind = d.lowerBound(t).length; insertInPlace(dict, ind, t); put(Bytecode(IR.GroupStart, nglob)); break; case '<': next(); if(current == '=') genLookaround(IR.LookbehindStart); else if(current == '!') genLookaround(IR.NeglookbehindStart); else error("'!' or '=' expected after '<'"); next(); break; default: error(" ':', '=', '<', 'P' or '!' expected after '(?' "); } } else { nglob = groupStack.top++; enforce(groupStack.top <= maxGroupNumber, "limit on number of submatches is exceeded"); put(Bytecode(IR.GroupStart, nglob)); } break; case ')': enforce(nesting, "Unmatched ')'"); nesting--; next(); fix = fixupStack.pop(); switch(ir[fix].code) { case IR.GroupStart: put(Bytecode(IR.GroupEnd,ir[fix].data)); parseQuantifier(fix); break; case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart: assert(lookaroundNest); fixLookaround(fix); lookaroundNest--; break; case IR.Option: //| xxx ) //two fixups: last option + full OR finishAlternation(fix); fix = fixupStack.top; switch(ir[fix].code) { case IR.GroupStart: fixupStack.pop(); put(Bytecode(IR.GroupEnd,ir[fix].data)); parseQuantifier(fix); break; case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart: assert(lookaroundNest); lookaroundNest--; fix = fixupStack.pop(); fixLookaround(fix); break; default://(?:xxx) fixupStack.pop(); parseQuantifier(fix); } break; default://(?:xxx) parseQuantifier(fix); } break; case '|': next(); fix = fixupStack.top; if(ir.length > fix && ir[fix].code == IR.Option) { ir[fix] = Bytecode(ir[fix].code, cast(uint)ir.length - fix); put(Bytecode(IR.GotoEndOr, 0)); fixupStack.top = cast(uint)ir.length; //replace latest fixup for Option put(Bytecode(IR.Option, 0)); break; } uint len, orStart; //start a new option if(fixupStack.length == 1) {//only root entry, effectively no fixup len = cast(uint)ir.length + IRL!(IR.GotoEndOr); orStart = 0; } else {//IR.lookahead, etc. fixups that have length > 1, thus check ir[x].length len = cast(uint)ir.length - fix - (ir[fix].length - 1); orStart = fix + ir[fix].length; } insertInPlace(ir, orStart, Bytecode(IR.OrStart, 0), Bytecode(IR.Option, len)); assert(ir[orStart].code == IR.OrStart); put(Bytecode(IR.GotoEndOr, 0)); fixupStack.push(orStart); //fixup for StartOR fixupStack.push(cast(uint)ir.length); //for second Option put(Bytecode(IR.Option, 0)); break; default://no groups or whatever uint start = cast(uint)ir.length; parseAtom(); parseQuantifier(start); } } if(fixupStack.length != 1) { fix = fixupStack.pop(); enforce(ir[fix].code == IR.Option, "no matching ')'"); finishAlternation(fix); enforce(fixupStack.length == 1, "no matching ')'"); } } //helper function, finalizes IR.Option, fix points to the first option of sequence void finishAlternation(uint fix) { enforce(ir[fix].code == IR.Option, "no matching ')'"); ir[fix] = Bytecode(ir[fix].code, cast(uint)ir.length - fix - IRL!(IR.OrStart)); fix = fixupStack.pop(); enforce(ir[fix].code == IR.OrStart, "no matching ')'"); ir[fix] = Bytecode(IR.OrStart, cast(uint)ir.length - fix - IRL!(IR.OrStart)); put(Bytecode(IR.OrEnd, cast(uint)ir.length - fix - IRL!(IR.OrStart))); uint pc = fix + IRL!(IR.OrStart); while(ir[pc].code == IR.Option) { pc = pc + ir[pc].data; if(ir[pc].code != IR.GotoEndOr) break; ir[pc] = Bytecode(IR.GotoEndOr, cast(uint)(ir.length - pc - IRL!(IR.OrEnd))); pc += IRL!(IR.GotoEndOr); } put(Bytecode.fromRaw(0)); } //parse and store IR for atom-quantifier pair @trusted void parseQuantifier(uint offset) {//copy is @system uint replace = ir[offset].code == IR.Nop; if(empty && !replace) return; uint min, max; switch(current) { case '*': min = 0; max = infinite; break; case '?': min = 0; max = 1; break; case '+': min = 1; max = infinite; break; case '{': enforce(next(), "Unexpected end of regex pattern"); enforce(std.ascii.isDigit(current), "First number required in repetition"); min = parseDecimal(); if(current == '}') max = min; else if(current == ',') { next(); if(std.ascii.isDigit(current)) max = parseDecimal(); else if(current == '}') max = infinite; else error("Unexpected symbol in regex pattern"); skipSpace(); if(current != '}') error("Unmatched '{' in regex pattern"); } else error("Unexpected symbol in regex pattern"); if(min > max) error("Illegal {n,m} quantifier"); break; default: if(replace) { copy(ir[offset + 1 .. $], ir[offset .. $ - 1]); ir.length -= 1; } return; } uint len = cast(uint)ir.length - offset - replace; bool greedy = true; //check only if we managed to get new symbol if(next() && current == '?') { greedy = false; next(); } if(max != infinite) { if(min != 1 || max != 1) { Bytecode op = Bytecode(greedy ? IR.RepeatStart : IR.RepeatQStart, len); if(replace) ir[offset] = op; else insertInPlace(ir, offset, op); put(Bytecode(greedy ? IR.RepeatEnd : IR.RepeatQEnd, len)); put(Bytecode.init); //hotspot putRaw(1); putRaw(min); putRaw(max); counterDepth = std.algorithm.max(counterDepth, nesting+1); } } else if(min) //&& max is infinite { if(min != 1) { Bytecode op = Bytecode(greedy ? IR.RepeatStart : IR.RepeatQStart, len); if(replace) ir[offset] = op; else insertInPlace(ir, offset, op); offset += 1;//so it still points to the repeated block put(Bytecode(greedy ? IR.RepeatEnd : IR.RepeatQEnd, len)); put(Bytecode.init); //hotspot putRaw(1); putRaw(min); putRaw(min); counterDepth = std.algorithm.max(counterDepth, nesting+1); } else if(replace) { copy(ir[offset+1 .. $], ir[offset .. $-1]); ir.length -= 1; } put(Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len)); enforce(ir.length + len < maxCompiledLength, "maximum compiled pattern length is exceeded"); ir ~= ir[offset .. offset+len]; //IR.InfinteX is always a hotspot put(Bytecode(greedy ? IR.InfiniteEnd : IR.InfiniteQEnd, len)); put(Bytecode.init); //merge index } else//vanila {0,inf} { Bytecode op = Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len); if(replace) ir[offset] = op; else insertInPlace(ir, offset, op); //IR.InfinteX is always a hotspot put(Bytecode(greedy ? IR.InfiniteEnd : IR.InfiniteQEnd, len)); put(Bytecode.init); //merge index } } //parse and store IR for atom void parseAtom() { if(empty) return; switch(current) { case '*', '?', '+', '|', '{', '}': error("'*', '+', '?', '{', '}' not allowed in atom"); break; case '.': put(Bytecode(IR.Any, 0)); next(); break; case '[': parseCharset(); break; case '\\': enforce(_next(), "Unfinished escape sequence"); parseEscape(); break; case '^': put(Bytecode(IR.Bol, 0)); next(); break; case '$': put(Bytecode(IR.Eol, 0)); next(); break; default: //FIXME: getCommonCasing in new std uni if(re_flags & RegexOption.casefold) { auto range = simpleCaseFoldings(current); assert(range.length <= 5); if(range.length == 1) put(Bytecode(IR.Char, range.front)); else foreach(v; range) put(Bytecode(IR.OrChar, v, cast(uint)range.length)); } else put(Bytecode(IR.Char, current)); next(); } } //generate code for start of lookaround: (?= (?! (?<= (?= 0) { if(ivals.length*2 > maxCharsetUsed) put(Bytecode(IR.Trie, cast(uint)n)); else put(Bytecode(IR.CodepointSet, cast(uint)n)); return; } if(ivals.length*2 > maxCharsetUsed) { auto t = getMatcher(set); put(Bytecode(IR.Trie, cast(uint)matchers.length)); matchers ~= t; debug(std_regex_allocation) writeln("Trie generated"); } else { put(Bytecode(IR.CodepointSet, cast(uint)charsets.length)); matchers ~= CharMatcher.init; } charsets ~= set; assert(charsets.length == matchers.length); } } //parse and generate IR for escape stand alone escape sequence @trusted void parseEscape() {//accesses array of appender switch(current) { case 'f': next(); put(Bytecode(IR.Char, '\f')); break; case 'n': next(); put(Bytecode(IR.Char, '\n')); break; case 'r': next(); put(Bytecode(IR.Char, '\r')); break; case 't': next(); put(Bytecode(IR.Char, '\t')); break; case 'v': next(); put(Bytecode(IR.Char, '\v')); break; case 'd': next(); charsetToIr(unicode.Nd); break; case 'D': next(); charsetToIr(unicode.Nd.inverted); break; case 'b': next(); put(Bytecode(IR.Wordboundary, 0)); break; case 'B': next(); put(Bytecode(IR.Notwordboundary, 0)); break; case 's': next(); charsetToIr(unicode.White_Space); break; case 'S': next(); charsetToIr(unicode.White_Space.inverted); break; case 'w': next(); charsetToIr(wordCharacter); break; case 'W': next(); charsetToIr(wordCharacter.inverted); break; case 'p': case 'P': auto CodepointSet = parseUnicodePropertySpec(current == 'P'); charsetToIr(CodepointSet); break; case 'x': uint code = parseUniHex(pat, 2); next(); put(Bytecode(IR.Char,code)); break; case 'u': case 'U': uint code = parseUniHex(pat, current == 'u' ? 4 : 8); next(); put(Bytecode(IR.Char, code)); break; case 'c': //control codes Bytecode code = Bytecode(IR.Char, parseControlCode()); next(); put(code); break; case '0': next(); put(Bytecode(IR.Char, 0));//NUL character break; case '1': .. case '9': uint nref = cast(uint)current - '0'; uint maxBackref = sum(groupStack.data); enforce(nref < maxBackref, "Backref to unseen group"); //perl's disambiguation rule i.e. //get next digit only if there is such group number while(nref < maxBackref && next() && std.ascii.isDigit(current)) { nref = nref * 10 + current - '0'; } if(nref >= maxBackref) nref /= 10; enforce(!isOpenGroup(nref), "Backref to open group"); uint localLimit = maxBackref - groupStack.top; if(nref >= localLimit) { put(Bytecode(IR.Backref, nref-localLimit)); ir[$-1].setLocalRef(); } else put(Bytecode(IR.Backref, nref)); markBackref(nref); break; default: auto op = Bytecode(IR.Char, current); next(); put(op); } } //parse and return a CodepointSet for \p{...Property...} and \P{...Property..}, //\ - assumed to be processed, p - is current CodepointSet parseUnicodePropertySpec(bool negated) { enum MAX_PROPERTY = 128; char[MAX_PROPERTY] result; uint k = 0; enforce(next(), "eof parsing unicode property spec"); if(current == '{') { while(k < MAX_PROPERTY && next() && current !='}' && current !=':') if(current != '-' && current != ' ' && current != '_') result[k++] = cast(char)std.ascii.toLower(current); enforce(k != MAX_PROPERTY, "invalid property name"); enforce(current == '}', "} expected "); } else {//single char properties e.g.: \pL, \pN ... enforce(current < 0x80, "invalid property name"); result[k++] = cast(char)current; } auto s = getUnicodeSet(result[0..k], negated, cast(bool)(re_flags & RegexOption.casefold)); enforce(!s.empty, "unrecognized unicode property spec"); next(); return s; } // @trusted void error(string msg) { import std.format; auto app = appender!string(); ir = null; formattedWrite(app, "%s\nPattern with error: `%s` <--HERE-- `%s`", msg, origin[0..$-pat.length], pat); throw new RegexException(app.data); } alias Char = BasicElementOf!R; @property program() { return makeRegex(this); } } /+ lightweight post process step, only essentials +/ @trusted void lightPostprocess(Char)(ref Regex!Char zis) {//@@@BUG@@@ write is @system with(zis) { struct FixedStack(T) { T[] arr; uint _top; //this(T[] storage){ arr = storage; _top = -1; } @property ref T top(){ assert(!empty); return arr[_top]; } void push(T x){ arr[++_top] = x; } T pop() { assert(!empty); return arr[_top--]; } @property bool empty(){ return _top == -1; } } auto counterRange = FixedStack!uint(new uint[maxCounterDepth+1], -1); counterRange.push(1); ulong cumRange = 0; for(uint i = 0; i < ir.length; i += ir[i].length) { if(ir[i].hotspot) { assert(i + 1 < ir.length, "unexpected end of IR while looking for hotspot"); ir[i+1] = Bytecode.fromRaw(hotspotTableSize); hotspotTableSize += counterRange.top; } switch(ir[i].code) { case IR.RepeatStart, IR.RepeatQStart: uint repEnd = cast(uint)(i + ir[i].data + IRL!(IR.RepeatStart)); assert(ir[repEnd].code == ir[i].paired.code); uint max = ir[repEnd + 4].raw; ir[repEnd+2].raw = counterRange.top; ir[repEnd+3].raw *= counterRange.top; ir[repEnd+4].raw *= counterRange.top; ulong cntRange = cast(ulong)(max)*counterRange.top; cumRange += cntRange; enforce(cumRange < maxCumulativeRepetitionLength, "repetition length limit is exceeded"); counterRange.push(cast(uint)cntRange + counterRange.top); threadCount += counterRange.top; break; case IR.RepeatEnd, IR.RepeatQEnd: threadCount += counterRange.top; counterRange.pop(); break; case IR.GroupStart: if(isBackref(ir[i].data)) ir[i].setBackrefence(); threadCount += counterRange.top; break; case IR.GroupEnd: if(isBackref(ir[i].data)) ir[i].setBackrefence(); threadCount += counterRange.top; break; default: threadCount += counterRange.top; } } checkIfOneShot(); if(!(flags & RegexInfo.oneShot)) kickstart = Kickstart!Char(zis, new uint[](256)); debug(std_regex_allocation) writefln("IR processed, max threads: %d", threadCount); optimize(zis); } } void fixupBytecode()(Bytecode[] ir) { Stack!uint fixups; with(IR) for(uint i=0; i