phobos/internal/arraydouble.d
2008-07-08 20:53:16 +00:00

282 lines
5.1 KiB
D

/***************************
* D programming language http://www.digitalmars.com/d/
* Runtime support for double array operations.
* Based on code originally written by Burton Radons.
* Placed in public domain.
*/
import std.cpuid;
bool disjoint(T)(T[] a, T[] b)
{
return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr);
}
alias double T;
extern (C):
/* ======================================================================== */
/***********************
* Computes:
* a[] = b[] + c[]
*/
T[] _adAssAddDouble(T[] a, T[] c, T[] b)
in
{
assert(a.length == b.length && b.length == c.length);
assert(disjoint(a, b));
assert(disjoint(a, c));
assert(disjoint(b, c));
}
body
{
version (D_InlineAsm_X86)
{
auto aptr = a.ptr;
auto aend = aptr + a.length;
auto bptr = b.ptr;
auto cptr = c.ptr;
T* n;
// SSE version is 333% faster
if (std.cpuid.sse2() && b.length >= 16)
{
n = aptr + (b.length & ~15);
// Unaligned case
asm
{
mov EAX, bptr; // left operand
mov ECX, cptr; // right operand
mov ESI, aptr; // destination operand
mov EDI, n; // end comparison
align 8;
startsseloopb:
movupd XMM0, [EAX];
movupd XMM1, [EAX+16];
movupd XMM2, [EAX+32];
movupd XMM3, [EAX+48];
add EAX, 64;
movupd XMM4, [ECX];
movupd XMM5, [ECX+16];
movupd XMM6, [ECX+32];
movupd XMM7, [ECX+48];
add ESI, 64;
addpd XMM0, XMM4;
addpd XMM1, XMM5;
addpd XMM2, XMM6;
addpd XMM3, XMM7;
add ECX, 64;
movupd [ESI+ 0-64], XMM0;
movupd [ESI+16-64], XMM1;
movupd [ESI+32-64], XMM2;
movupd [ESI+48-64], XMM3;
cmp ESI, EDI;
jb startsseloopb;
mov aptr, ESI;
mov bptr, EAX;
mov cptr, ECX;
}
}
// Handle remainder
while (aptr < aend)
*aptr++ = *bptr++ + *cptr++;
}
else
{
for (int i = 0; i < a.length; i++)
a[i] = b[i] + c[i];
}
return a;
}
unittest
{
printf("_adAssAddDouble unittest\n");
{
T[] a = [1, 2, 3];
T[] b = [4, 5, 6];
T[3] c;
c[] = a[] + b[];
assert(c[0] == 5);
assert(c[1] == 7);
assert(c[2] == 9);
}
{
T[] a = [1, 2, 3, 4, 5, 6, 7, 8, 9];
T[] b = [4, 5, 6, 7, 8, 9, 10, 11, 12];
T[9] c;
c[] = a[] + b[];
assert(c[0] == 5);
assert(c[1] == 7);
assert(c[2] == 9);
assert(c[3] == 11);
assert(c[4] == 13);
assert(c[5] == 15);
assert(c[6] == 17);
assert(c[7] == 19);
assert(c[8] == 21);
}
{
const int dim = 35;
T[dim] a;
T[dim] b;
T[dim] c;
for (int i = 0; i < dim; i++)
{ a[i] = i;
b[i] = i + 7;
c[i] = i * 2;
}
c[] = a[] + b[];
for (int i = 0; i < dim; i++)
{
assert(c[i] == a[i] + b[i]);
}
}
}
/* ======================================================================== */
/***********************
* Computes:
* a[] = b[] - c[]
*/
T[] _adAssMinDouble(T[] a, T[] c, T[] b)
in
{
assert(a.length == b.length && b.length == c.length);
assert(disjoint(a, b));
assert(disjoint(a, c));
assert(disjoint(b, c));
}
body
{
version (D_InlineAsm_X86)
{
auto aptr = a.ptr;
auto aend = aptr + a.length;
auto bptr = b.ptr;
auto cptr = c.ptr;
T* n;
// SSE version is 324% faster
if (std.cpuid.sse2() && b.length >= 8)
{
n = aptr + (b.length & ~7);
// Unaligned case
asm
{
mov EAX, bptr; // left operand
mov ECX, cptr; // right operand
mov ESI, aptr; // destination operand
mov EDI, n; // end comparison
align 8;
startsseloopb:
movupd XMM0, [EAX];
movupd XMM1, [EAX+16];
movupd XMM2, [EAX+32];
movupd XMM3, [EAX+48];
add EAX, 64;
movupd XMM4, [ECX];
movupd XMM5, [ECX+16];
movupd XMM6, [ECX+32];
movupd XMM7, [ECX+48];
add ESI, 64;
subpd XMM0, XMM4;
subpd XMM1, XMM5;
subpd XMM2, XMM6;
subpd XMM3, XMM7;
add ECX, 64;
movupd [ESI+ 0-64], XMM0;
movupd [ESI+16-64], XMM1;
movupd [ESI+32-64], XMM2;
movupd [ESI+48-64], XMM3;
cmp ESI, EDI;
jb startsseloopb;
mov aptr, ESI;
mov bptr, EAX;
mov cptr, ECX;
}
}
// Handle remainder
while (aptr < aend)
*aptr++ = *bptr++ - *cptr++;
}
else
{
for (int i = 0; i < a.length; i++)
a[i] = b[i] - c[i];
}
return a;
}
unittest
{
printf("_adAssMinDouble unittest\n");
{
T[] a = [1, 2, 3];
T[] b = [4, 5, 6];
T[3] c;
c[] = a[] - b[];
for (int i = 0; i < c.length; i++)
{
assert(c[i] == a[i] - b[i]);
}
}
{
T[] a = [1, 2, 3, 4, 5, 6, 7, 8, 9];
T[] b = [4, 5, 6, 7, 8, 9, 10, 11, 12];
T[9] c;
c[] = a[] - b[];
for (int i = 0; i < c.length; i++)
{
assert(c[i] == a[i] - b[i]);
}
}
{
const int dim = 35;
T[dim] a;
T[dim] b;
T[dim] c;
for (int i = 0; i < dim; i++)
{ a[i] = i;
b[i] = i + 7;
c[i] = i * 2;
}
c[] = a[] - b[];
for (int i = 0; i < dim; i++)
{
assert(c[i] == a[i] - b[i]);
}
}
}