mirror of
https://github.com/dlang/phobos.git
synced 2025-05-10 22:18:03 +03:00
improved unit tests, added T[]+=T[]*T
This commit is contained in:
parent
18e0aa5ae0
commit
6352d7dc0d
3 changed files with 398 additions and 116 deletions
|
@ -14,9 +14,9 @@ version (unittest)
|
|||
*/
|
||||
int cpuid;
|
||||
const int CPUID_MAX = 5;
|
||||
bool mmx() { return cpuid == 1; }
|
||||
bool sse() { return cpuid == 2; }
|
||||
bool sse2() { return cpuid == 3; }
|
||||
bool mmx() { return cpuid == 1 && std.cpuid.mmx(); }
|
||||
bool sse() { return cpuid == 2 && std.cpuid.sse(); }
|
||||
bool sse2() { return cpuid == 3 && std.cpuid.sse2(); }
|
||||
bool amd3dnow() { return cpuid == 4 && std.cpuid.amd3dnow(); }
|
||||
}
|
||||
else
|
||||
|
@ -130,8 +130,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -241,8 +243,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -344,8 +348,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -385,14 +391,10 @@ T[] _arrayExpSliceAddass_d(T[] a, T value)
|
|||
// SSE2 version is 114% faster
|
||||
if (sse2() && a.length >= 8)
|
||||
{
|
||||
// align pointer
|
||||
auto n = cast(T*)((cast(uint)aptr + 7) & ~7);
|
||||
while (aptr < n)
|
||||
*aptr++ += value;
|
||||
n = cast(T*)((cast(uint)aend) & ~7);
|
||||
auto n = cast(T*)((cast(uint)aend) & ~7);
|
||||
if (aptr < n)
|
||||
|
||||
// Aligned case
|
||||
// Unaligned case
|
||||
asm
|
||||
{
|
||||
mov ESI, aptr;
|
||||
|
@ -402,19 +404,19 @@ T[] _arrayExpSliceAddass_d(T[] a, T value)
|
|||
|
||||
align 8;
|
||||
startsseloopa:
|
||||
movapd XMM0, [ESI];
|
||||
movapd XMM1, [ESI+16];
|
||||
movapd XMM2, [ESI+32];
|
||||
movapd XMM3, [ESI+48];
|
||||
movupd XMM0, [ESI];
|
||||
movupd XMM1, [ESI+16];
|
||||
movupd XMM2, [ESI+32];
|
||||
movupd XMM3, [ESI+48];
|
||||
add ESI, 64;
|
||||
addpd XMM0, XMM4;
|
||||
addpd XMM1, XMM4;
|
||||
addpd XMM2, XMM4;
|
||||
addpd XMM3, XMM4;
|
||||
movapd [ESI+ 0-64], XMM0;
|
||||
movapd [ESI+16-64], XMM1;
|
||||
movapd [ESI+32-64], XMM2;
|
||||
movapd [ESI+48-64], XMM3;
|
||||
movupd [ESI+ 0-64], XMM0;
|
||||
movupd [ESI+16-64], XMM1;
|
||||
movupd [ESI+32-64], XMM2;
|
||||
movupd [ESI+48-64], XMM3;
|
||||
cmp ESI, EDI;
|
||||
jb startsseloopa;
|
||||
|
||||
|
@ -441,8 +443,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -546,8 +550,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -649,8 +655,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -755,8 +763,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -796,14 +806,10 @@ T[] _arrayExpSliceMinass_d(T[] a, T value)
|
|||
// SSE2 version is 115% faster
|
||||
if (sse2() && a.length >= 8)
|
||||
{
|
||||
// align pointer
|
||||
auto n = cast(T*)((cast(uint)aptr + 7) & ~7);
|
||||
while (aptr < n)
|
||||
*aptr++ -= value;
|
||||
n = cast(T*)((cast(uint)aend) & ~7);
|
||||
auto n = cast(T*)((cast(uint)aend) & ~7);
|
||||
if (aptr < n)
|
||||
|
||||
// Aligned case
|
||||
// Unaligned case
|
||||
asm
|
||||
{
|
||||
mov ESI, aptr;
|
||||
|
@ -813,19 +819,19 @@ T[] _arrayExpSliceMinass_d(T[] a, T value)
|
|||
|
||||
align 8;
|
||||
startsseloopa:
|
||||
movapd XMM0, [ESI];
|
||||
movapd XMM1, [ESI+16];
|
||||
movapd XMM2, [ESI+32];
|
||||
movapd XMM3, [ESI+48];
|
||||
movupd XMM0, [ESI];
|
||||
movupd XMM1, [ESI+16];
|
||||
movupd XMM2, [ESI+32];
|
||||
movupd XMM3, [ESI+48];
|
||||
add ESI, 64;
|
||||
subpd XMM0, XMM4;
|
||||
subpd XMM1, XMM4;
|
||||
subpd XMM2, XMM4;
|
||||
subpd XMM3, XMM4;
|
||||
movapd [ESI+ 0-64], XMM0;
|
||||
movapd [ESI+16-64], XMM1;
|
||||
movapd [ESI+32-64], XMM2;
|
||||
movapd [ESI+48-64], XMM3;
|
||||
movupd [ESI+ 0-64], XMM0;
|
||||
movupd [ESI+16-64], XMM1;
|
||||
movupd [ESI+32-64], XMM2;
|
||||
movupd [ESI+48-64], XMM3;
|
||||
cmp ESI, EDI;
|
||||
jb startsseloopa;
|
||||
|
||||
|
@ -842,7 +848,7 @@ T[] _arrayExpSliceMinass_d(T[] a, T value)
|
|||
|
||||
unittest
|
||||
{
|
||||
printf("_arrayExpSliceminass_d unittest\n");
|
||||
printf("_arrayExpSliceMinass_d unittest\n");
|
||||
for (cpuid = 0; cpuid < CPUID_MAX; cpuid++)
|
||||
{
|
||||
version (log) printf(" cpuid %d\n", cpuid);
|
||||
|
@ -852,8 +858,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -957,8 +965,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1060,8 +1070,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1170,8 +1182,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1211,14 +1225,10 @@ T[] _arrayExpSliceMulass_d(T[] a, T value)
|
|||
// SSE2 version is 109% faster
|
||||
if (sse2() && a.length >= 8)
|
||||
{
|
||||
// align pointer
|
||||
auto n = cast(T*)((cast(uint)aptr + 7) & ~7);
|
||||
while (aptr < n)
|
||||
*aptr++ *= value;
|
||||
n = cast(T*)((cast(uint)aend) & ~7);
|
||||
auto n = cast(T*)((cast(uint)aend) & ~7);
|
||||
if (aptr < n)
|
||||
|
||||
// Aligned case
|
||||
// Unaligned case
|
||||
asm
|
||||
{
|
||||
mov ESI, aptr;
|
||||
|
@ -1228,19 +1238,19 @@ T[] _arrayExpSliceMulass_d(T[] a, T value)
|
|||
|
||||
align 8;
|
||||
startsseloopa:
|
||||
movapd XMM0, [ESI];
|
||||
movapd XMM1, [ESI+16];
|
||||
movapd XMM2, [ESI+32];
|
||||
movapd XMM3, [ESI+48];
|
||||
movupd XMM0, [ESI];
|
||||
movupd XMM1, [ESI+16];
|
||||
movupd XMM2, [ESI+32];
|
||||
movupd XMM3, [ESI+48];
|
||||
add ESI, 64;
|
||||
mulpd XMM0, XMM4;
|
||||
mulpd XMM1, XMM4;
|
||||
mulpd XMM2, XMM4;
|
||||
mulpd XMM3, XMM4;
|
||||
movapd [ESI+ 0-64], XMM0;
|
||||
movapd [ESI+16-64], XMM1;
|
||||
movapd [ESI+32-64], XMM2;
|
||||
movapd [ESI+48-64], XMM3;
|
||||
movupd [ESI+ 0-64], XMM0;
|
||||
movupd [ESI+16-64], XMM1;
|
||||
movupd [ESI+32-64], XMM2;
|
||||
movupd [ESI+48-64], XMM3;
|
||||
cmp ESI, EDI;
|
||||
jb startsseloopa;
|
||||
|
||||
|
@ -1267,8 +1277,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1372,8 +1384,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1489,8 +1503,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1594,8 +1610,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1619,3 +1637,79 @@ unittest
|
|||
}
|
||||
|
||||
|
||||
/* ======================================================================== */
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] -= b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceMinass_d(T[] a, T value, T[] b)
|
||||
{
|
||||
return _arraySliceExpMulSliceAddass_d(a, -value, b);
|
||||
}
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] += b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceAddass_d(T[] a, T value, T[] b)
|
||||
in
|
||||
{
|
||||
assert(a.length == b.length);
|
||||
assert(disjoint(a, b));
|
||||
}
|
||||
body
|
||||
{
|
||||
auto aptr = a.ptr;
|
||||
auto aend = aptr + a.length;
|
||||
auto bptr = b.ptr;
|
||||
|
||||
// Handle remainder
|
||||
while (aptr < aend)
|
||||
*aptr++ += *bptr++ * value;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
printf("_arraySliceExpMulSliceAddass_d unittest\n");
|
||||
|
||||
cpuid = 1;
|
||||
{
|
||||
version (log) printf(" cpuid %d\n", cpuid);
|
||||
|
||||
for (int j = 0; j < 1; j++)
|
||||
{
|
||||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
b[i] = cast(T)(i + 7);
|
||||
c[i] = cast(T)(i * 2);
|
||||
}
|
||||
|
||||
b[] = c[];
|
||||
c[] += a[] * 6;
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
//printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]);
|
||||
if (c[i] != cast(T)(b[i] + a[i] * 6))
|
||||
{
|
||||
printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]);
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@ version (unittest)
|
|||
*/
|
||||
int cpuid;
|
||||
const int CPUID_MAX = 5;
|
||||
bool mmx() { return cpuid == 1; }
|
||||
bool sse() { return cpuid == 2; }
|
||||
bool sse2() { return cpuid == 3; }
|
||||
bool mmx() { return cpuid == 1 && std.cpuid.mmx(); }
|
||||
bool sse() { return cpuid == 2 && std.cpuid.sse(); }
|
||||
bool sse2() { return cpuid == 3 && std.cpuid.sse2(); }
|
||||
bool amd3dnow() { return cpuid == 4 && std.cpuid.amd3dnow(); }
|
||||
}
|
||||
else
|
||||
|
@ -170,8 +170,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -320,8 +322,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -462,8 +466,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -596,8 +602,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -737,8 +745,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -881,8 +891,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1027,8 +1039,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1161,8 +1175,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1302,8 +1318,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1445,8 +1463,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1594,8 +1614,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1728,8 +1750,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -1869,8 +1893,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -2025,8 +2051,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -2171,8 +2199,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -2196,3 +2226,80 @@ unittest
|
|||
}
|
||||
|
||||
|
||||
/* ======================================================================== */
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] -= b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceMinass_f(T[] a, T value, T[] b)
|
||||
{
|
||||
return _arraySliceExpMulSliceAddass_f(a, -value, b);
|
||||
}
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] += b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceAddass_f(T[] a, T value, T[] b)
|
||||
in
|
||||
{
|
||||
assert(a.length == b.length);
|
||||
assert(disjoint(a, b));
|
||||
}
|
||||
body
|
||||
{
|
||||
auto aptr = a.ptr;
|
||||
auto aend = aptr + a.length;
|
||||
auto bptr = b.ptr;
|
||||
|
||||
// Handle remainder
|
||||
while (aptr < aend)
|
||||
*aptr++ += *bptr++ * value;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
printf("_arraySliceExpMulSliceAddass_f unittest\n");
|
||||
|
||||
cpuid = 1;
|
||||
{
|
||||
version (log) printf(" cpuid %d\n", cpuid);
|
||||
|
||||
for (int j = 0; j < 1; j++)
|
||||
{
|
||||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
b[i] = cast(T)(i + 7);
|
||||
c[i] = cast(T)(i * 2);
|
||||
}
|
||||
|
||||
b[] = c[];
|
||||
c[] += a[] * 6;
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
//printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]);
|
||||
if (c[i] != cast(T)(b[i] + a[i] * 6))
|
||||
{
|
||||
printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]);
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ version (unittest)
|
|||
*/
|
||||
int cpuid;
|
||||
const int CPUID_MAX = 1;
|
||||
bool mmx() { return cpuid == 1; }
|
||||
bool sse() { return cpuid == 2; }
|
||||
bool sse2() { return cpuid == 3; }
|
||||
bool mmx() { return cpuid == 1 && std.cpuid.mmx(); }
|
||||
bool sse() { return cpuid == 2 && std.cpuid.sse(); }
|
||||
bool sse2() { return cpuid == 3 && std.cpuid.sse2(); }
|
||||
bool amd3dnow() { return cpuid == 4 && std.cpuid.amd3dnow(); }
|
||||
}
|
||||
else
|
||||
|
@ -72,8 +72,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -130,8 +132,10 @@ unittest
|
|||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim];
|
||||
T[] c = new T[dim];
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
|
@ -153,3 +157,80 @@ unittest
|
|||
}
|
||||
}
|
||||
|
||||
/* ======================================================================== */
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] -= b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceMinass_r(T[] a, T value, T[] b)
|
||||
{
|
||||
return _arraySliceExpMulSliceAddass_r(a, -value, b);
|
||||
}
|
||||
|
||||
/***********************
|
||||
* Computes:
|
||||
* a[] += b[] * value
|
||||
*/
|
||||
|
||||
T[] _arraySliceExpMulSliceAddass_r(T[] a, T value, T[] b)
|
||||
in
|
||||
{
|
||||
assert(a.length == b.length);
|
||||
assert(disjoint(a, b));
|
||||
}
|
||||
body
|
||||
{
|
||||
auto aptr = a.ptr;
|
||||
auto aend = aptr + a.length;
|
||||
auto bptr = b.ptr;
|
||||
|
||||
// Handle remainder
|
||||
while (aptr < aend)
|
||||
*aptr++ += *bptr++ * value;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
printf("_arraySliceExpMulSliceAddass_r unittest\n");
|
||||
|
||||
cpuid = 1;
|
||||
{
|
||||
version (log) printf(" cpuid %d\n", cpuid);
|
||||
|
||||
for (int j = 0; j < 1; j++)
|
||||
{
|
||||
const int dim = 67;
|
||||
T[] a = new T[dim + j]; // aligned on 16 byte boundary
|
||||
a = a[j .. dim + j]; // misalign for second iteration
|
||||
T[] b = new T[dim + j];
|
||||
b = b[j .. dim + j];
|
||||
T[] c = new T[dim + j];
|
||||
c = c[j .. dim + j];
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{ a[i] = cast(T)i;
|
||||
b[i] = cast(T)(i + 7);
|
||||
c[i] = cast(T)(i * 2);
|
||||
}
|
||||
|
||||
b[] = c[];
|
||||
c[] += a[] * 6;
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
//printf("[%d]: %Lg ?= %Lg + %Lg * 6\n", i, c[i], b[i], a[i]);
|
||||
if (c[i] != cast(T)(b[i] + a[i] * 6))
|
||||
{
|
||||
printf("[%d]: %Lg ?= %Lg + %Lg * 6\n", i, c[i], b[i], a[i]);
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue