Improve speed of find for random access needles (strings)

For find a string within a string, std.algorithm.searching.find was
unnecessarily slow. The reason is it created intermediate slices. A
naively written nested-for-loop implementation was a few times
faster.

For random access ranges (which strings are) this uses an index based
algorithm, which does not need to create an intermediate slice. Speed
is now comparable to the nested-for-loop implementation even in rather
pathological cases.

This might help with issue 9646.
This commit is contained in:
Andreas Zwinkau 2016-05-25 13:12:51 +02:00 committed by Andreas Zwinkau
parent e216c10b2c
commit a9d5b8ca77

View file

@ -1773,11 +1773,51 @@ if (isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && isBidirectionalRa
&& is(typeof(binaryFun!pred(haystack.front, needle.front)) : bool))
{
if (needle.empty) return haystack;
const needleLength = walkLength(needle.save);
static if (hasLength!R2)
{
immutable needleLength = needle.length;
}
else
{
immutable needleLength = walkLength(needle.save);
}
if (needleLength > haystack.length)
{
return haystack[haystack.length .. haystack.length];
}
static if (isRandomAccessRange!R2)
{
immutable lastIndex = needleLength - 1;
auto last = needle[lastIndex];
size_t j = lastIndex, skip = 0;
for (; j < haystack.length;)
{
if (!binaryFun!pred(haystack[j], last))
{
++j;
continue;
}
immutable k = j - lastIndex;
// last elements match
for (size_t i = 0;; ++i)
{
if (i == lastIndex)
return haystack[k .. haystack.length];
if (!binaryFun!pred(haystack[k + i], needle[i]))
break;
}
if (skip == 0) {
skip = 1;
while (skip < needleLength && needle[needleLength - 1 - skip] != needle[needleLength - 1])
{
++skip;
}
}
j += skip;
}
}
else
{
// @@@BUG@@@
// auto needleBack = moveBack(needle);
// Stage 1: find the step
@ -1793,9 +1833,7 @@ if (isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && isBidirectionalRa
for (;;)
{
if (scout >= haystack.length)
{
return haystack[haystack.length .. haystack.length];
}
break;
if (!binaryFun!pred(haystack[scout], needleBack))
{
++scout;
@ -1803,14 +1841,17 @@ if (isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && isBidirectionalRa
}
// Found a match with the last element in the needle
auto cand = haystack[scout + 1 - needleLength .. haystack.length];
// This intermediate creation of a slice is why the
// random access variant above is faster.
if (startsWith!pred(cand, needle))
{
// found
return cand;
}
// Continue with the stride
scout += step;
}
}
return haystack[haystack.length .. haystack.length];
}
@safe unittest