From 42eb21616fe0b2bb14c659773e7b9e67230a89b2 Mon Sep 17 00:00:00 2001 From: Dmitry Olshansky Date: Sun, 17 Nov 2013 14:05:53 +0400 Subject: [PATCH 1/4] Make MultiArray CTFE-able, workaround CTFE bug. --- std/uni.d | 152 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 68 deletions(-) diff --git a/std/uni.d b/std/uni.d index a4d2f877a..4b8c4d7bc 100644 --- a/std/uni.d +++ b/std/uni.d @@ -890,7 +890,7 @@ struct MultiArray(Types...) static if(n != dim-1) { auto start = raw_ptr!(n+1); - size_t len = storage.length; + size_t len = (storage.ptr+storage.length-start); copyForward(start[0..len-delta], start[delta..len]); // adjust offsets last, they affect raw_slice @@ -941,69 +941,74 @@ private: unittest { - // sizes are: - // lvl0: 3, lvl1 : 2, lvl2: 1 - auto m = MultiArray!(int, ubyte, int)(3,2,1); + enum dg = (){ + // sizes are: + // lvl0: 3, lvl1 : 2, lvl2: 1 + auto m = MultiArray!(int, ubyte, int)(3,2,1); - static void check(size_t k, T)(ref T m, int n) - { - foreach(i; 0..n) - assert(m.slice!(k)[i] == i+1, text("level:",i," : ",m.slice!(k)[0..n])); - } + static void check(size_t k, T)(ref T m, int n) + { + foreach(i; 0..n) + assert(m.slice!(k)[i] == i+1, text("level:",i," : ",m.slice!(k)[0..n])); + } - static void checkB(size_t k, T)(ref T m, int n) - { - foreach(i; 0..n) - assert(m.slice!(k)[i] == n-i, text("level:",i," : ",m.slice!(k)[0..n])); - } + static void checkB(size_t k, T)(ref T m, int n) + { + foreach(i; 0..n) + assert(m.slice!(k)[i] == n-i, text("level:",i," : ",m.slice!(k)[0..n])); + } - static void fill(size_t k, T)(ref T m, int n) - { - foreach(i; 0..n) - m.slice!(k)[i] = force!ubyte(i+1); - } + static void fill(size_t k, T)(ref T m, int n) + { + foreach(i; 0..n) + m.slice!(k)[i] = force!ubyte(i+1); + } - static void fillB(size_t k, T)(ref T m, int n) - { - foreach(i; 0..n) - m.slice!(k)[i] = force!ubyte(n-i); - } + static void fillB(size_t k, T)(ref T m, int n) + { + foreach(i; 0..n) + m.slice!(k)[i] = force!ubyte(n-i); + } - m.length!1 = 100; - fill!1(m, 100); - check!1(m, 100); + m.length!1 = 100; + fill!1(m, 100); + check!1(m, 100); - m.length!0 = 220; - fill!0(m, 220); - check!1(m, 100); - check!0(m, 220); + m.length!0 = 220; + fill!0(m, 220); + check!1(m, 100); + check!0(m, 220); - m.length!2 = 17; - fillB!2(m, 17); - checkB!2(m, 17); - check!0(m, 220); - check!1(m, 100); + m.length!2 = 17; + fillB!2(m, 17); + checkB!2(m, 17); + check!0(m, 220); + check!1(m, 100); - m.length!2 = 33; - checkB!2(m, 17); - fillB!2(m, 33); - checkB!2(m, 33); - check!0(m, 220); - check!1(m, 100); + m.length!2 = 33; + checkB!2(m, 17); + fillB!2(m, 33); + checkB!2(m, 33); + check!0(m, 220); + check!1(m, 100); - m.length!1 = 195; - fillB!1(m, 195); - checkB!1(m, 195); - checkB!2(m, 33); - check!0(m, 220); + m.length!1 = 195; + fillB!1(m, 195); + checkB!1(m, 195); + checkB!2(m, 33); + check!0(m, 220); - auto marr = MultiArray!(BitPacked!(uint, 4), BitPacked!(uint, 6))(20, 10); - marr.length!0 = 15; - marr.length!1 = 30; - fill!1(marr, 30); - fill!0(marr, 15); - check!1(marr, 30); - check!0(marr, 15); + auto marr = MultiArray!(BitPacked!(uint, 4), BitPacked!(uint, 6))(20, 10); + marr.length!0 = 15; + marr.length!1 = 30; + fill!1(marr, 30); + fill!0(marr, 15); + check!1(marr, 30); + check!0(marr, 15); + return 0; + }; + enum ct = dg(); + auto rt = dg(); } unittest @@ -1134,6 +1139,22 @@ pure nothrow: } } + private void simpleWrite(TypeOfBitPacked!T val, size_t n) + in + { + static if(isIntegral!T) + assert(val <= mask); + } + body + { + auto q = n / factor; + auto r = n % factor; + size_t tgt_shift = bits*r; + size_t word = origin[q]; + origin[q] = (word & ~(mask< Date: Sun, 17 Nov 2013 17:42:47 +0400 Subject: [PATCH 2/4] refactor TrieBuilder --- std/uni.d | 180 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 104 insertions(+), 76 deletions(-) diff --git a/std/uni.d b/std/uni.d index 4b8c4d7bc..dc338a2e6 100644 --- a/std/uni.d +++ b/std/uni.d @@ -1232,6 +1232,48 @@ pure nothrow: limit = items; } + bool zeros(size_t s, size_t e) + in + { + assert(s % factor == 0); + assert(e % factor == 0); + assert(s < e); + } + body + { + size_t* p = ptr.origin; + auto count = (e - s)/factor; + size_t val; + foreach(i; 0..count) + { + val |= p[i]; + if(val) + return false; + } + return true; + } + + bool ones(size_t s, size_t e) + in + { + assert(s % factor == 0); + assert(e % factor == 0); + assert(s < e); + } + body + { + size_t* p = ptr.origin; + auto count = (e - s)/factor; + size_t val; + foreach(i; 0..count) + { + val |= p[i]; + if(val != size_t.max) + return false; + } + return true; + } + T opIndex(size_t idx) inout in { @@ -3416,7 +3458,7 @@ private: uint idx_zeros, idx_ones; } // iteration over levels of Trie, each indexes its own level and thus a shortened domain - size_t[Prefix.length] indices; + size_t[Prefix.length] indices; // default filler value to use Value defValue; // this is a full-width index of next item @@ -3428,91 +3470,82 @@ private: @disable this(); + //shortcut for index variable at level 'level' + @property ref idx(size_t level)(){ return indices[level]; } + // this function assumes no holes in the input so // indices are going one by one void addValue(size_t level, T)(T val, size_t numVals) { + alias j = idx!level; enum pageSize = 1< n) - numVals -= n; - else - { - n = numVals; - numVals = 0; - } - static if(level < Prefix.length-1) - assert(indices[level] <= 2^^Prefix[level+1].bitSize); - ptr[j..j+n] = val; - j += n; - indices[level] = j; - } - // last level (i.e. topmost) has 1 "page" - // thus it need not to add a new page on upper level - static if(level != 0) - { - if(indices[level] % pageSize == 0) - spillToNextPage!level(ptr); + {// can incur narrowing conversion + assert(j < ptr.length); + ptr[j] = force!(typeof(ptr[j]))(val); } + j++; + if(j % pageSize == 0) + spillToNextPage!level(ptr); + return; } - while(numVals); + // longer row of values + // get to the next page boundary + size_t nextPB = (j + pageSize) & ~(pageSize-1); + size_t n = nextPB - j;// can fill right in this page + if(numVals < n) //fits in current page + { + ptr[j..j+numVals] = val; + j += numVals; + return; + } + numVals -= n; + //write till the end of current page + ptr[j..j+n] = val; + j += n; + //spill to the next page + spillToNextPage!level(ptr); + // page at once loop + while(numVals >= pageSize) + { + numVals -= pageSize; + ptr[j..j+pageSize] = val; + j += pageSize; + spillToNextPage!level(ptr); + } + if(numVals) + { + // the leftovers, an incomplete page + ptr[j..j+numVals] = val; + j += numVals; + } + } + + void spillToNextPage(size_t level, Slice)(ref Slice ptr) + { + // last level (i.e. topmost) has 1 "page" + // thus it need not to add a new page on upper level + static if(level != 0) + spillToNextPageImpl!(level)(ptr); } // this can re-use the current page if duplicate or allocate a new one // it also makes sure that previous levels point to the correct page in this level - void spillToNextPage(size_t level, Slice)(ref Slice ptr) + void spillToNextPageImpl(size_t level, Slice)(ref Slice ptr) { alias typeof(table.slice!(level-1)[0]) NextIdx; NextIdx next_lvl_index; enum pageSize = 1< Date: Sun, 17 Nov 2013 20:16:21 +0400 Subject: [PATCH 3/4] tweak algorithm to actually fast-track zero-pages Also significantly speed up replicateBits for single bit pattern case. --- std/uni.d | 101 ++++++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/std/uni.d b/std/uni.d index dc338a2e6..a8db13b25 100644 --- a/std/uni.d +++ b/std/uni.d @@ -798,6 +798,13 @@ size_t replicateBits(size_t times, size_t bits)(size_t val) { static if(times == 1) return val; + else static if(bits == 1) + { + static if(times == size_t.sizeof*8) + return val ? size_t.max : 0; + else + return val ? (1<= pageSize) + static if(level != 0)//on the first level it always fits { - numVals -= pageSize; - ptr[j..j+pageSize] = val; - j += pageSize; + numVals -= n; + //write till the end of current page + ptr[j..j+n] = val; + j += n; + //spill to the next page spillToNextPage!level(ptr); - } - if(numVals) - { - // the leftovers, an incomplete page - ptr[j..j+numVals] = val; - j += numVals; + // page at once loop + if(state[level].idx_zeros != size_t.max && val == T.init) + { + alias typeof(table.slice!(level-1)[0]) NextIdx; + addValue!(level-1)(force!NextIdx(state[level].idx_zeros), + numVals/pageSize); + ptr = table.slice!level; //table structure might have changed + numVals %= pageSize; + } + else + { + while(numVals >= pageSize) + { + numVals -= pageSize; + ptr[j..j+pageSize] = val; + j += pageSize; + spillToNextPage!level(ptr); + } + } + if(numVals) + { + // the leftovers, an incomplete page + ptr[j..j+numVals] = val; + j += numVals; + } } } @@ -3571,6 +3562,10 @@ private: { L_allocate_page: next_lvl_index = force!NextIdx(idx!level/pageSize - 1); + if(state[level].idx_zeros == size_t.max && ptr.zeros(j, j+pageSize)) + { + state[level].idx_zeros = next_lvl_index; + } // allocate next page version(none) { @@ -3628,7 +3623,7 @@ public: defValue = filler; // zeros-page index, ones-page index foreach(ref v; state) - v = ConstructState(true, true, uint.max, uint.max); + v = ConstructState(size_t.max, size_t.max); table = typeof(table)(indices); // one page per level is a bootstrap minimum foreach(i; Sequence!(0, Prefix.length)) From cf7c701c080ad4753c823bf06893b1066e6cbe97 Mon Sep 17 00:00:00 2001 From: Dmitry Olshansky Date: Sun, 17 Nov 2013 21:21:27 +0400 Subject: [PATCH 4/4] other tweaks for std.regex/ctRegex --- std/uni.d | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/std/uni.d b/std/uni.d index a8db13b25..6d3aa021c 100644 --- a/std/uni.d +++ b/std/uni.d @@ -1131,19 +1131,9 @@ pure nothrow: private T simpleIndex(size_t n) inout { - static if(factor == bytesPerWord*8) - { - // a re-write with less data dependency - auto q = n / factor; - auto r = n % factor; - return cast(T)(origin[q] & (mask<> bits*r) & mask); - } + auto q = n / factor; + auto r = n % factor; + return cast(T)((origin[q] >> bits*r) & mask); } private void simpleWrite(TypeOfBitPacked!T val, size_t n) @@ -1696,11 +1686,14 @@ unittest static void destroy(T)(ref T arr) if(isDynamicArray!T && is(Unqual!T == T)) { - debug + version(bug10929) //@@@BUG@@@ { - arr[] = cast(typeof(T.init[0]))(0xdead_beef); + debug + { + arr[] = cast(typeof(T.init[0]))(0xdead_beef); + } + arr = null; } - arr = null; } static void destroy(T)(ref T arr) @@ -2005,6 +1998,13 @@ public: end = sp.length; } + this(Uint24Array!SP sp, size_t s, size_t e) + { + slice = sp; + start = s; + end = e; + } + @property auto front()const { uint a = slice[start]; @@ -2029,6 +2029,20 @@ public: end -= 2; } + auto opIndex(size_t idx) const + { + uint a = slice[start+idx*2]; + uint b = slice[start+idx*2+1]; + return CodepointInterval(a, b); + } + + auto opSlice(size_t s, size_t e) + { + return Intervals(slice, s*2+start, e*2+start); + } + + @property size_t length()const { return slice.length/2; } + @property bool empty()const { return start == end; } @property auto save(){ return this; } @@ -3742,8 +3756,7 @@ public: idx = cast(size_t)p[0](key); foreach(i, v; p[0..$-1]) idx = cast(size_t)((_table.ptr!i[idx]<