mirror of
https://github.com/dlang/phobos.git
synced 2025-05-01 15:40:36 +03:00
Improved BinaryHeap. Added largestPartialIntersection.
This commit is contained in:
parent
fbec43b21f
commit
574af5bf3e
1 changed files with 269 additions and 113 deletions
382
std/algorithm.d
382
std/algorithm.d
|
@ -1958,7 +1958,7 @@ Tuple!(ElementType!(Range), size_t)
|
||||||
minCount(alias pred = "a < b", Range)(Range range)
|
minCount(alias pred = "a < b", Range)(Range range)
|
||||||
{
|
{
|
||||||
if (range.empty) return typeof(return)();
|
if (range.empty) return typeof(return)();
|
||||||
auto p = &(range.front);
|
auto p = &(range.front());
|
||||||
size_t occurrences = 1;
|
size_t occurrences = 1;
|
||||||
for (range.popFront; !range.empty; range.popFront)
|
for (range.popFront; !range.empty; range.popFront)
|
||||||
{
|
{
|
||||||
|
@ -1966,7 +1966,7 @@ minCount(alias pred = "a < b", Range)(Range range)
|
||||||
if (binaryFun!(pred)(range.front, *p))
|
if (binaryFun!(pred)(range.front, *p))
|
||||||
{
|
{
|
||||||
// change the min
|
// change the min
|
||||||
p = &(range.front);
|
p = &(range.front());
|
||||||
occurrences = 1;
|
occurrences = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -3849,22 +3849,16 @@ void topNIndex(
|
||||||
Range, RangeIndex)(Range r, RangeIndex index, bool sorted = false)
|
Range, RangeIndex)(Range r, RangeIndex index, bool sorted = false)
|
||||||
if (isIntegral!(ElementType!(RangeIndex)))
|
if (isIntegral!(ElementType!(RangeIndex)))
|
||||||
{
|
{
|
||||||
{
|
enforce(ElementType!(RangeIndex).max >= index.length,
|
||||||
size_t i;
|
"Index type too small");
|
||||||
enforce(ElementType!(RangeIndex).max >= index.length,
|
|
||||||
"Index type too small");
|
|
||||||
foreach (ref e; index) e = cast(typeof(e)) i++;
|
|
||||||
}
|
|
||||||
bool indirectLess(ElementType!(RangeIndex) a, ElementType!(RangeIndex) b)
|
bool indirectLess(ElementType!(RangeIndex) a, ElementType!(RangeIndex) b)
|
||||||
{
|
{
|
||||||
return binaryFun!(less)(r[a], r[b]);
|
return binaryFun!(less)(r[a], r[b]);
|
||||||
}
|
}
|
||||||
auto heap = BinaryHeap!(RangeIndex, indirectLess)(index);
|
auto heap = BinaryHeap!(RangeIndex, indirectLess)(index, 0);
|
||||||
foreach (i; index.length .. r.length)
|
foreach (i; 0 .. r.length)
|
||||||
{
|
{
|
||||||
if (!indirectLess(cast(ElementType!(RangeIndex)) i, heap.top)) continue;
|
heap.conditionalPut(cast(ElementType!RangeIndex) i);
|
||||||
// replace the top with e
|
|
||||||
heap.replaceTop(cast(ElementType!(RangeIndex)) i);
|
|
||||||
}
|
}
|
||||||
if (sorted) heap.pop(heap.length);
|
if (sorted) heap.pop(heap.length);
|
||||||
}
|
}
|
||||||
|
@ -3875,21 +3869,15 @@ void topNIndex(
|
||||||
Range, RangeIndex)(Range r, RangeIndex index, bool sorted = false)
|
Range, RangeIndex)(Range r, RangeIndex index, bool sorted = false)
|
||||||
if (is(ElementType!(RangeIndex) == ElementType!(Range)*))
|
if (is(ElementType!(RangeIndex) == ElementType!(Range)*))
|
||||||
{
|
{
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
foreach (ref e; index) e = &r[i++];
|
|
||||||
}
|
|
||||||
static bool indirectLess(const ElementType!(RangeIndex) a,
|
static bool indirectLess(const ElementType!(RangeIndex) a,
|
||||||
const ElementType!(RangeIndex) b)
|
const ElementType!(RangeIndex) b)
|
||||||
{
|
{
|
||||||
return binaryFun!(less)(*a, *b);
|
return binaryFun!less(*a, *b);
|
||||||
}
|
}
|
||||||
auto heap = BinaryHeap!(RangeIndex, indirectLess)(index);
|
auto heap = BinaryHeap!(RangeIndex, indirectLess)(index, 0);
|
||||||
foreach (i; index.length .. r.length)
|
foreach (i; 0 .. r.length)
|
||||||
{
|
{
|
||||||
if (!indirectLess(&r[i], heap.top)) continue;
|
heap.conditionalPut(&r[i]);
|
||||||
// replace the top with e
|
|
||||||
heap.replaceTop(&r[i]);
|
|
||||||
}
|
}
|
||||||
if (sorted) heap.pop(heap.length);
|
if (sorted) heap.pop(heap.length);
|
||||||
}
|
}
|
||||||
|
@ -3908,7 +3896,7 @@ unittest
|
||||||
auto b = new ubyte[5];
|
auto b = new ubyte[5];
|
||||||
topNIndex!("a > b")(a, b, true);
|
topNIndex!("a > b")(a, b, true);
|
||||||
//foreach (e; b) writeln(e, ":", a[e]);
|
//foreach (e; b) writeln(e, ":", a[e]);
|
||||||
assert(b == [ cast(ubyte) 0, 2, 1, 6, 5]);
|
assert(b == [ cast(ubyte) 0, 2, 1, 6, 5], text(b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/+
|
/+
|
||||||
|
@ -4377,12 +4365,13 @@ assert(h.length == 0);
|
||||||
*/
|
*/
|
||||||
struct BinaryHeap(Range, alias less = "a < b") if (isRandomAccessRange!(Range))
|
struct BinaryHeap(Range, alias less = "a < b") if (isRandomAccessRange!(Range))
|
||||||
{
|
{
|
||||||
|
private size_t _length;
|
||||||
private Range _store;
|
private Range _store;
|
||||||
private alias binaryFun!(less) comp;
|
private alias binaryFun!(less) comp;
|
||||||
|
|
||||||
//@@@BUG
|
//@@@BUG
|
||||||
//private static void heapify(Range r, size_t i)
|
//private static void heapify(Range r, size_t i)
|
||||||
void heapify(Range r, size_t i)
|
public void heapify(Range r, size_t i = 0)
|
||||||
{
|
{
|
||||||
auto b = 0;
|
auto b = 0;
|
||||||
for (;;)
|
for (;;)
|
||||||
|
@ -4390,7 +4379,7 @@ struct BinaryHeap(Range, alias less = "a < b") if (isRandomAccessRange!(Range))
|
||||||
auto left = b + (i - b) * 2 + 1, right = left + 1;
|
auto left = b + (i - b) * 2 + 1, right = left + 1;
|
||||||
if (right == r.length)
|
if (right == r.length)
|
||||||
{
|
{
|
||||||
if (binaryFun!(less)(r[i], r[left])) swap(r[i], r[left]);
|
if (comp(r[i], r[left])) swap(r[i], r[left]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (right > r.length) return;
|
if (right > r.length) return;
|
||||||
|
@ -4404,24 +4393,49 @@ struct BinaryHeap(Range, alias less = "a < b") if (isRandomAccessRange!(Range))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pop(Range store)
|
/*static*/ void pop(Range store)
|
||||||
{
|
{
|
||||||
if (store.length <= 1) return;
|
assert(!store.empty);
|
||||||
auto newEnd = store.length - 1;
|
if (store.length == 1) return;
|
||||||
swap(store.front, store[newEnd]);
|
swap(store.front, store.back);
|
||||||
heapify(store[0 .. newEnd], 0);
|
heapify(store[0 .. store.length - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertValid()
|
||||||
|
{
|
||||||
|
debug
|
||||||
|
{
|
||||||
|
if (_length < 2) return;
|
||||||
|
for (size_t n = _length - 1; n >= 1; --n)
|
||||||
|
{
|
||||||
|
auto parentIdx = (n - 1) / 2;
|
||||||
|
assert(!comp(_store[parentIdx], _store[n]), text(n));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
Converts the range $(D r) into a heap. Performs $(BIGOH r.length)
|
Converts the range $(D r) into a heap. If $(D initialSize) is
|
||||||
evaluations of $(D comp).
|
specified, only the first $(D initialSize) elements in $(D r) are
|
||||||
*/
|
transformed into a heap, after which the heap can grow up to $(D
|
||||||
this(Range r)
|
r.length). Performs $(BIGOH min(r.length, initialSize)) evaluations of
|
||||||
|
$(D less).
|
||||||
|
*/
|
||||||
|
this(Range r, size_t initialSize = size_t.max)
|
||||||
{
|
{
|
||||||
_store = r;
|
acquire(r, initialSize);
|
||||||
if (_store.length < 2) return;
|
}
|
||||||
auto i = (_store.length - 2) / 2;
|
|
||||||
|
/**
|
||||||
|
Takes ownership of a range.
|
||||||
|
*/
|
||||||
|
void acquire(Range r, size_t initialSize = size_t.max)
|
||||||
|
{
|
||||||
|
swap(r, _store);
|
||||||
|
_length = min(_store.length, initialSize);
|
||||||
|
if (_length < 2) return;
|
||||||
|
auto i = (_length - 2) / 2;
|
||||||
// @@@BUG: statement not reachable
|
// @@@BUG: statement not reachable
|
||||||
// for (;;)
|
// for (;;)
|
||||||
// {
|
// {
|
||||||
|
@ -4429,38 +4443,89 @@ evaluations of $(D comp).
|
||||||
// if (i == 0) return;
|
// if (i == 0) return;
|
||||||
// --i;
|
// --i;
|
||||||
// }
|
// }
|
||||||
this.heapify(_store, i);
|
this.heapify(_store[0 .. _length], i);
|
||||||
for (; i-- != 0;)
|
for (; i-- != 0;)
|
||||||
{
|
{
|
||||||
this.heapify(_store, i);
|
this.heapify(_store[0 .. _length], i);
|
||||||
}
|
}
|
||||||
|
assertValid;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Takes ownership of a range. The old content of the store is destroyed.
|
Clears the heap. Returns the contents of the store (which satisfies
|
||||||
*/
|
the $(LUCKY heap property)).
|
||||||
void acquire(Range r)
|
|
||||||
{
|
|
||||||
swap(r, _store);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
Clears the heap. Returns the contents of the store.
|
|
||||||
*/
|
*/
|
||||||
Range release()
|
Range release()
|
||||||
{
|
{
|
||||||
Range result;
|
Range result;
|
||||||
swap(result, _store);
|
swap(result, _store);
|
||||||
|
result = result[0 .. _length];
|
||||||
|
_length = 0;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Adds one element to the heap. If the length of the heap grows beyond
|
||||||
|
the length of the range that the heap was associated with, a new range
|
||||||
|
is allocated.
|
||||||
|
*/
|
||||||
|
void push(ElementType!Range value)
|
||||||
|
{
|
||||||
|
//assertValid;
|
||||||
|
if (_length == _store.length)
|
||||||
|
{
|
||||||
|
// reallocate
|
||||||
|
_store.length = (_length + 1) * 2;
|
||||||
|
}
|
||||||
|
// no reallocation
|
||||||
|
_store[_length] = value;
|
||||||
|
// sink down the element
|
||||||
|
for (size_t n = _length; n; )
|
||||||
|
{
|
||||||
|
auto parentIdx = (n - 1) / 2;
|
||||||
|
if (!comp(_store[parentIdx], _store[n])) break; // done!
|
||||||
|
// must swap and continue
|
||||||
|
swap(_store[parentIdx], _store[n]);
|
||||||
|
n = parentIdx;
|
||||||
|
}
|
||||||
|
++_length;
|
||||||
|
assertValid;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
If $(D this.length < this.capacity), call $(D
|
||||||
|
this.push(value)). Otherwise, if $(D less(value, this.top)), replace
|
||||||
|
$(D this.top) with $(D value) and adjust the heap to maintain the heap
|
||||||
|
property. This function is useful in scenarios where the largest $(D
|
||||||
|
N) of a large set of candidates must be remembered.
|
||||||
|
|
||||||
|
Returns: $(D true) if $(D value) was put in the heap, $(D false)
|
||||||
|
otherwise.
|
||||||
|
*/
|
||||||
|
bool conditionalPut(ElementType!Range value)
|
||||||
|
{
|
||||||
|
if (_length < _store.length)
|
||||||
|
{
|
||||||
|
push(value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// must replace the top
|
||||||
|
assert(!_store.empty);
|
||||||
|
if (!comp(value, _store.front)) return false;
|
||||||
|
_store.front = value;
|
||||||
|
heapify(_store[0 .. _length]);
|
||||||
|
assertValid;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Get the _top element (the largest according to the predicate $(D
|
Get the _top element (the largest according to the predicate $(D
|
||||||
less)).
|
less)).
|
||||||
*/
|
*/
|
||||||
ref const(ElementType!(Range)) top()
|
ElementType!Range top()
|
||||||
{
|
{
|
||||||
return _store[0];
|
assert(_length);
|
||||||
|
return _store.front;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4468,10 +4533,10 @@ Pops the largest element (according to the predicate $(D less)).
|
||||||
*/
|
*/
|
||||||
void pop()
|
void pop()
|
||||||
{
|
{
|
||||||
enforce(_store.length);
|
enforce(_length);
|
||||||
if (_store.length > 1) swap(_store.front, _store.back);
|
if (_length > 1) swap(_store.front, _store[_length - 1]);
|
||||||
_store.popBack;
|
--_length;
|
||||||
heapify(_store, 0);
|
heapify(_store[0 .. _length]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4484,33 +4549,31 @@ heap is sorted and returned.
|
||||||
*/
|
*/
|
||||||
Range pop(size_t n)
|
Range pop(size_t n)
|
||||||
{
|
{
|
||||||
immutable size_t newSize = n >= _store.length
|
immutable size_t newSize = n >= _length
|
||||||
? (n = _store.length, 0)
|
? (n = _length, 0)
|
||||||
: _store.length - n;
|
: _length - n;
|
||||||
auto result = _store[newSize .. _store.length];
|
auto result = _store[newSize .. _length];
|
||||||
while (_store.length > newSize)
|
while (_length > newSize)
|
||||||
{
|
{
|
||||||
swap(_store.front, _store.back);
|
swap(_store.front, _store[_length - 1]);
|
||||||
_store.popBack;
|
--_length;
|
||||||
heapify(_store, 0);
|
heapify(_store[0 .. _length]);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Replaces the top element (largest according to $(D less)) with $(D
|
Returns the _length of the heap.
|
||||||
newTop).
|
*/
|
||||||
*/
|
size_t length() const
|
||||||
void replaceTop(ElementType!(Range) newTop)
|
|
||||||
{
|
{
|
||||||
_store.front = newTop;
|
return _length;
|
||||||
heapify!(less)(_store, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Returns the _length of the heap.
|
Returns the length of the range underlying the heap.
|
||||||
*/
|
*/
|
||||||
size_t length()
|
size_t capacity()
|
||||||
{
|
{
|
||||||
return _store.length;
|
return _store.length;
|
||||||
}
|
}
|
||||||
|
@ -4521,28 +4584,30 @@ unittest
|
||||||
{
|
{
|
||||||
// example from "Introduction to Algorithms" Cormen et al., p 146
|
// example from "Introduction to Algorithms" Cormen et al., p 146
|
||||||
int[] a = [ 4, 1, 3, 2, 16, 9, 10, 14, 8, 7 ];
|
int[] a = [ 4, 1, 3, 2, 16, 9, 10, 14, 8, 7 ];
|
||||||
//vnBinaryHeap!(int[]) h = BinaryHeap!(int[])(a);
|
BinaryHeap!(int[]) h = BinaryHeap!(int[])(a);
|
||||||
//makeBinaryHeap!(binaryFun!("a < b"))(a);
|
//makeBinaryHeap!(binaryFun!("a < b"))(a);
|
||||||
// assert(a == [ 16, 14, 10, 8, 7, 9, 3, 2, 4, 1 ]);
|
assert(a == [ 16, 14, 10, 8, 7, 9, 3, 2, 4, 1 ]);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int[] a = [ 4, 1, 3, 2, 16, 9, 10, 14, 8, 7 ];
|
||||||
|
int[] b = new int[a.length];
|
||||||
|
BinaryHeap!(int[]) h = BinaryHeap!(int[])(b, 0);
|
||||||
|
foreach (e; a)
|
||||||
|
{
|
||||||
|
h.push(e);
|
||||||
|
}
|
||||||
|
assert(b == [ 16, 14, 10, 8, 7, 3, 9, 1, 4, 2 ], text(b));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int[] a = [ 4, 1, 3, 2, 16, 9, 10, 14, 8, 7 ];
|
||||||
|
BinaryHeap!(int[]) h = BinaryHeap!(int[])(a);
|
||||||
|
//makeBinaryHeap!(binaryFun!("a < b"))(a);
|
||||||
|
auto b = h.pop(5);
|
||||||
|
assert(b == [ 8, 9, 10, 14, 16 ]);
|
||||||
|
b = h.pop(5);
|
||||||
|
assert(b == [ 1, 2, 3, 4, 7 ]);
|
||||||
|
assert(h.length == 0);
|
||||||
}
|
}
|
||||||
// {
|
|
||||||
// int[] a = [ 4, 1, 3, 2, 16, 9, 10, 14, 8, 7 ];
|
|
||||||
// BinaryHeap!(int[]) h = BinaryHeap!(int[])(a);
|
|
||||||
// //makeBinaryHeap!(binaryFun!("a < b"))(a);
|
|
||||||
// auto b = h.pop(5);
|
|
||||||
// assert(b == [ 8, 9, 10, 14, 16 ]);
|
|
||||||
// b = h.pop(5);
|
|
||||||
// assert(b == [ 1, 2, 3, 4, 7 ]);
|
|
||||||
// assert(h.length == 0);
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
||||||
version(none) unittest
|
|
||||||
{
|
|
||||||
// example from "Introduction to Algorithms" Cormen et al., p 143
|
|
||||||
int[] a = [ 16, 4, 10, 14, 7, 9, 3, 2, 8, 1 ];
|
|
||||||
BinaryHeap!(int[]) h = BinaryHeap!(int[])(a);
|
|
||||||
assert(a == [ 16, 14, 10, 8, 7, 9, 3, 2, 4, 1 ]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4565,30 +4630,12 @@ TRange topNCopy(alias less = "a < b", SRange, TRange)
|
||||||
if (isInputRange!(SRange) && isRandomAccessRange!(TRange)
|
if (isInputRange!(SRange) && isRandomAccessRange!(TRange)
|
||||||
&& hasLength!(TRange) && hasSlicing!(TRange))
|
&& hasLength!(TRange) && hasSlicing!(TRange))
|
||||||
{
|
{
|
||||||
// make an initial heap in the target
|
auto heap = BinaryHeap!(TRange, less)(target, 0);
|
||||||
foreach (i; 0 .. target.length)
|
foreach (e; source) heap.conditionalPut(e);
|
||||||
{
|
|
||||||
if (source.empty)
|
|
||||||
{
|
|
||||||
target = target[0 .. i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
target[i] = source.front;
|
|
||||||
source.popFront;
|
|
||||||
}
|
|
||||||
if (target.empty) return target;
|
|
||||||
|
|
||||||
auto heap = BinaryHeap!(TRange, less)(target);
|
|
||||||
// now copy stuff into the target if it's smaller
|
|
||||||
for (; !source.empty; source.popFront)
|
|
||||||
{
|
|
||||||
if (!binaryFun!(less)(source.front, heap.top)) continue;
|
|
||||||
heap.replaceTop(source.front);
|
|
||||||
}
|
|
||||||
return sorted ? heap.pop(heap.length) : heap.release;
|
return sorted ? heap.pop(heap.length) : heap.release;
|
||||||
}
|
}
|
||||||
|
|
||||||
version(none) unittest
|
unittest
|
||||||
{
|
{
|
||||||
int[] a = [ 10, 16, 2, 3, 1, 5, 0 ];
|
int[] a = [ 10, 16, 2, 3, 1, 5, 0 ];
|
||||||
int[] b = new int[3];
|
int[] b = new int[3];
|
||||||
|
@ -4596,7 +4643,7 @@ version(none) unittest
|
||||||
assert(b == [ 0, 1, 2 ]);
|
assert(b == [ 0, 1, 2 ]);
|
||||||
}
|
}
|
||||||
|
|
||||||
version(none) unittest
|
unittest
|
||||||
{
|
{
|
||||||
auto r = Random(unpredictableSeed);
|
auto r = Random(unpredictableSeed);
|
||||||
int[] a = new int[uniform(1, 1000, r)];
|
int[] a = new int[uniform(1, 1000, r)];
|
||||||
|
@ -5094,6 +5141,115 @@ version(unittest)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// largestPartialIntersection
|
||||||
|
/**
|
||||||
|
Given a range of sorted forward ranges $(D ror), copies to $(D tgt)
|
||||||
|
the elements that are common to most ranges, along with their number
|
||||||
|
of occurrences. All ranges in $(D ror) are assumed to be sorted by $(D
|
||||||
|
less). Only the most frequent $(D tgt.length) elements are returned.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
----
|
||||||
|
// Figure which number can be found in most arrays of the set of
|
||||||
|
// arrays below.
|
||||||
|
double[][] a =
|
||||||
|
[
|
||||||
|
[ 1, 4, 7, 8 ],
|
||||||
|
[ 1, 7 ],
|
||||||
|
[ 1, 7, 8],
|
||||||
|
[ 4 ],
|
||||||
|
[ 7 ],
|
||||||
|
];
|
||||||
|
auto b = new Tuple!(double, uint)[1];
|
||||||
|
largestPartialIntersection(a, b);
|
||||||
|
// First member is the item, second is the occurrence count
|
||||||
|
assert(b == tuple(7.0, 4u));
|
||||||
|
----
|
||||||
|
|
||||||
|
$(D 7.0) is the correct answer because it occurs in $(D 4) out of the
|
||||||
|
$(D 5) inputs, more than any other number. The second member of the
|
||||||
|
resulting tuple is indeed $(D 4) (recording the number of occurrences
|
||||||
|
of $(D 7.0)). If more of the top-frequent numbers are needed, just
|
||||||
|
create a larger $(D tgt) range. In the axample above, creating $(D b)
|
||||||
|
with length $(D 2) yields $(D tuple(1.0, 3u)) in the second position.
|
||||||
|
|
||||||
|
The function $(D largestPartialIntersection) is useful for
|
||||||
|
e.g. searching an $(LUCKY inverted index) for the documents most
|
||||||
|
likely to contain some terms of interest. The complexity of the search
|
||||||
|
is $(BIGOH n * log(tgt.length)), where $(D n) is the sum of lengths of
|
||||||
|
all input ranges. This approach is faster than keeping an associative
|
||||||
|
array of the occurrences and then selecting its top items, and also
|
||||||
|
requires less memory ($(D largestPartialIntersection) builds its
|
||||||
|
result directly in $(D tgt) and requires no extra memory).
|
||||||
|
*/
|
||||||
|
void largestPartialIntersection
|
||||||
|
(alias less = "a < b", RangeOfRanges, Range)
|
||||||
|
(RangeOfRanges ror, Range tgt, bool sorted = false)
|
||||||
|
{
|
||||||
|
alias binaryFun!less comp;
|
||||||
|
alias ElementType!(ElementType!RangeOfRanges) E;
|
||||||
|
alias ElementType!Range InfoType;
|
||||||
|
bool heapComp(InfoType a, InfoType b)
|
||||||
|
{
|
||||||
|
return a.field[1] > b.field[1];
|
||||||
|
}
|
||||||
|
auto heap = BinaryHeap!(Range, heapComp)(tgt, 0);
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
auto tr = frontTransversal(ror);
|
||||||
|
if (tr.empty) break;
|
||||||
|
auto mc = minCount!less(tr);
|
||||||
|
//auto cm = tuple(mc.field[1], mc.field[0]);
|
||||||
|
// auto cnt = mc.field[1];
|
||||||
|
// writeln(min);
|
||||||
|
// writeln(cnt);
|
||||||
|
// Put that on the heap
|
||||||
|
heap.conditionalPut(mc);
|
||||||
|
// Now remove that minimum element from wherever it occurred
|
||||||
|
foreach (ref r; ror)
|
||||||
|
{
|
||||||
|
if (r.empty) continue;
|
||||||
|
if (!comp(r.front, mc.field[0])
|
||||||
|
&& !comp(mc.field[0], r.front))
|
||||||
|
r.popFront;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sorted) heap.pop(heap.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
double[][] a =
|
||||||
|
[
|
||||||
|
[ 1, 4, 7, 8 ],
|
||||||
|
[ 1, 7 ],
|
||||||
|
[ 1, 7, 8],
|
||||||
|
[ 4 ],
|
||||||
|
[ 7 ],
|
||||||
|
];
|
||||||
|
auto b = new Tuple!(double, uint)[2];
|
||||||
|
largestPartialIntersection(a, b, true);
|
||||||
|
//sort(b);
|
||||||
|
//writeln(b);
|
||||||
|
assert(b[0] == [ tuple(7., 4u), tuple(1., 3u) ][], text(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string[][] a =
|
||||||
|
[
|
||||||
|
[ "1", "4", "7", "8" ],
|
||||||
|
[ "1", "7" ],
|
||||||
|
[ "1", "7", "8"],
|
||||||
|
[ "4" ],
|
||||||
|
[ "7" ],
|
||||||
|
];
|
||||||
|
auto b = new Tuple!(string, uint)[2];
|
||||||
|
largestPartialIntersection(a, b, true);
|
||||||
|
//writeln(b);
|
||||||
|
assert(b == [ tuple("7", 4u), tuple("1", 3u) ][], text(b));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
|
* Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
|
||||||
* Written by Andrei Alexandrescu, www.erdani.org
|
* Written by Andrei Alexandrescu, www.erdani.org
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue