//Written in the D programming language /* Implementation of a concept "NFA in a word" which is bit-parallel impementation of regex where each bit represents a state in an NFA. Execution is Thompson-style achieved via bit tricks. There is a great number of limitations inlcuding not tracking any state (captures) and not supporting even basic assertions such as ^, $ or \b. */ module std.regex.internal.bitnfa; package(std.regex): import std.regex.internal.ir; debug(std_regex_bitnfa) import std.stdio; struct HashTab() { @disable this(this); uint opIndex(uint key) { auto p = locate(key, table); assert(p.occupied); return p.value; } void opIndexAssign(uint value, uint key) { if(table.length == 0) grow(); auto p = locate(key, table); if(!p.occupied) { items++; if(4*items >= table.length*3) { grow(); p = locate(key, table); } p.occupied = true; p.key = key; } p.value = value; } auto keys() { auto app = appender!(uint[])(); foreach(i, v; table) { if(v.occupied) app.put(v.key); } return app.data; } auto values() { auto app = appender!(uint[])(); foreach(i, v; table) { if(v.occupied) app.put(v.value); } return app.data; } private: static uint hashOf(uint val) { return (val >> 20) ^ (val>>8) ^ val; } struct Node { uint key; uint value; bool occupied; } Node[] table; size_t items; static Node* locate(uint key, Node[] table) { size_t slot = hashOf(key) & (table.length-1); while(table.ptr[slot].occupied) { if(table.ptr[slot].key == key) break; slot += 1; if(slot == table.length) slot = 0; } return table.ptr+slot; } void grow() { Node[] newTable = new Node[table.length ? table.length*2 : 4]; foreach(i, v; table) { if(v.occupied) { auto p = locate(v.key, newTable); *p = v; } } } } // Specialized 2-level trie of uint masks for BitNfa. // Uses the concept of CoW: a page gets modified in place // if the block's ref-count is 1, else a newblock is allocated // and ref count is decreased struct UIntTrie2 { ushort[] index; // pages --> blocks ushort[] refCounts; // ref counts for each block uint[] hashes; // hashes of blocks uint[] blocks; // linear array with blocks uint[] scratch; // temporary block enum blockSize = 2<<8; // size of block static uint hash(uint[] data) { uint h = 5183; foreach(v; data) { h = 31*h + v; } return h; } static UIntTrie2 opCall() { UIntTrie2 ut; ut.index.length = 2<<13; ut.blocks = new uint[blockSize]; ut.blocks[] = uint.max; // all ones ut.scratch = new uint[blockSize]; ut.refCounts = new ushort[1]; ut.refCounts[0] = 2<<13; ut.hashes = new uint[1]; ut.hashes[0] = hash(ut.blocks); return ut; } bool opIndex(dchar ch) { return false; // TODO: stub } void opIndexOpAssign(string op)(uint val, dchar ch) { // TODO: stub } void opSliceOpAssign(string op)(uint val, uint start, uint end) { // TODO: stub } } // Since there is no way to mark a starting position // we need 2 instances of BitNfa: one to find the end, and the other // to run backwards to find the start. struct BitNfa { uint[128] asciiTab; // state mask for ascii characters UIntTrie2 uniTab; // state mask for unicode characters uint[uint] controlFlow; // maps each bit pattern to resulting jumps pattern uint controlFlowMask; // masks all control flow bits uint finalMask; // marks final states terminating the NFA bool empty; // if this engine is empty void combineControlFlow() { uint[] keys = controlFlow.keys; uint[] values = controlFlow.values; auto selection = new bool[keys.length]; bool nextChoice() { uint i; for(i=0;i %d %s", j, ir[j].mnemonic); paths.push(j+IRL!Option); //writefln(">> %d", j+IRL!Option); j = j + ir[j].data + IRL!Option; } break; case GotoEndOr: paths.push(j+IRL!GotoEndOr+ir[j].data); break; case OrEnd, Wordboundary, Notwordboundary, Bol, Eol, Nop, GroupStart, GroupEnd: paths.push(j+ir[j].length); break; case LookaheadStart, NeglookaheadStart, LookbehindStart, NeglookbehindStart: paths.push(j + IRL!LookaheadStart + ir[j].data + IRL!LookaheadEnd); break; case InfiniteStart, InfiniteQStart: paths.push(j+IRL!InfiniteStart); paths.push(j+ir[j].data+IRL!InfiniteEnd); break; case InfiniteBloomStart: paths.push(j+IRL!InfiniteStart); paths.push(j+ir[j].data+IRL!InfiniteBloomEnd); break; case InfiniteEnd, InfiniteQEnd: paths.push(j-ir[j].data); paths.push(j+IRL!InfiniteEnd); break; case InfiniteBloomEnd: paths.push(j-ir[j].data); paths.push(j+IRL!InfiniteBloomEnd); break; default: result ~= j; } } return result; } this(Char)(auto ref Regex!Char re) { asciiTab[] = uint.max; // all ones uniTab = UIntTrie2(); controlFlow[0] = 0; // pc -> bit number uint[] bitMapping = new uint[re.ir.length]; uint bitCount = 0, nesting=0, lastNonnested=0; bool stop = false; with(re) outer: for(uint i=0; i