dmd/src/ddmd/backend/divcoeff.c

317 lines
7.7 KiB
C

/**
* Compiler implementation of the
* $(LINK2 http://www.dlang.org, D programming language).
*
* Copyright: Copyright (c) 2013-2017 by The D Language Foundation, All Rights Reserved
* Authors: $(LINK2 http://www.digitalmars.com, Walter Bright)
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/ddmd/backend/divcoeff.c, backend/divcoeff.c)
*/
/***************************************************
* Algorithms from "Division by Invariant Integers using Multiplication"
* by Torbjoern Granlund and Peter L. Montgomery
*/
#include <stdio.h>
#include <assert.h>
// This MUST MATCH typedef targ_ullong in cdef.h.
#if defined(__UINT64_TYPE__) && !defined(__APPLE__)
typedef __UINT64_TYPE__ ullong;
#elif defined(__UINTMAX_TYPE__)
typedef __UINTMAX_TYPE__ ullong;
#else
typedef unsigned long long ullong;
#endif
void test_udiv_coefficients();
/* unsigned 128 bit math
*/
#define SIGN64(x) ((long long)(x) < 0)
#define SHL128(dh,dl, xh,xl) ((dh = (xh << 1) | SIGN64(xl)), (dl = xl << 1))
#define SHR128(dh,dl, xh,xl) ((dl = (xl >> 1) | ((xh & 1) << 63)),(dh = xh >> 1))
#define XltY128(xh,xl,yh,yl) (xh < yh || (xh == yh && xl < yl))
void u128Div(ullong xh, ullong xl, ullong yh, ullong yl, ullong *pqh, ullong *pql)
{
/* Use auld skool shift & subtract algorithm.
* Not very efficient.
*/
//ullong xxh = xh, xxl = xl, yyh = yh, yyl = yl;
assert(yh || yl); // no div-by-0 bugs
// left justify y
unsigned shiftcount = 1;
if (!yh)
{ yh = yl;
yl = 0;
shiftcount += 64;
}
while (!SIGN64(yh))
{
SHL128(yh,yl, yh,yl);
shiftcount += 1;
}
ullong qh = 0;
ullong ql = 0;
do
{
SHL128(qh,ql, qh,ql);
if (XltY128(yh,yl,xh,xl))
{
// x -= y;
if (xl < yl)
{ xl -= yl;
xh -= yh + 1;
}
else
{ xl -= yl;
xh -= yh;
}
ql |= 1;
}
SHR128(yh,yl, yh,yl);
} while (--shiftcount);
*pqh = qh;
*pql = ql;
// Remainder is xh,xl
#if 0
printf("%016llx_%016llx / %016llx_%016llx = %016llx_%016llx\n", xxh,xxl,yyh,yyl,qh,ql);
if (xxh == 0 && yyh == 0)
printf("should be %llx\n", xxl / yyl);
#endif
}
/************************************
* Implement Algorithm 6.2: Selection of multiplier and shift count
* Input:
* N 32 or 64
* d divisor (must not be 0 or a power of 2)
* prec bits of precision desired
* Output:
* *pm factor
* *pshpost post shift
* Returns:
* true m >= 2**N
*/
bool choose_multiplier(int N, ullong d, int prec, ullong *pm, int *pshpost)
{
assert(N == 32 || N == 64);
assert(prec <= N);
assert(d > 1 && (d & (d - 1)));
// Compute b such that 2**(b-1) < d <= 2**b
// which is the number of significant bits in d
int b = 0;
ullong d1 = d;
while (d1)
{
++b;
d1 >>= 1;
}
int shpost = b;
bool mhighbit = false;
if (N == 32)
{
// mlow = (2**(N + b)) / d
ullong mlow = (1ULL << (N + b)) / d;
// uhigh = (2**(N + b) + 2**(N + b - prec)) / d
ullong mhigh = ((1ULL << (N + b)) + (1ULL << (N + b - prec))) / d;
while (mlow/2 < mhigh/2 && shpost)
{
mlow /= 2;
mhigh /= 2;
--shpost;
}
*pm = mhigh & 0xFFFFFFFF;
mhighbit = mhigh >> N;
}
else if (N == 64)
{
// Same as for N==32, but use 128 bit unsigned arithmetic
// mlow = (2**(N + b)) / d
ullong mlowl = 0;
ullong mlowh = 1ULL << b;
// mlow /= d
u128Div(mlowh, mlowl, 0, d, &mlowh, &mlowl);
// mhigh = (2**(N + b) + 2**(N + b - prec)) / d
ullong mhighl = 0;
ullong mhighh = 1ULL << b;
int e = N + b - prec;
if (e < 64)
mhighl = 1ULL << e;
else
mhighh |= 1ULL << (e - 64);
// mhigh /= d
u128Div(mhighh, mhighl, 0, d, &mhighh, &mhighl);
while (1)
{
// mlowb = mlow / 2
ullong mlowbh,mlowbl;
SHR128(mlowbh,mlowbl, mlowh,mlowl);
// mhighb = mhigh / 2
ullong mhighbh,mhighbl;
SHR128(mhighbh,mhighbl, mhighh,mhighl);
// if (mlowb < mhighb && shpost)
if (XltY128(mlowbh,mlowbl, mhighbh,mhighbl) && shpost)
{
// mlow = mlowb
mlowl = mlowbl;
mlowh = mlowbh;
// mhigh = mhighb
mhighl = mhighbl;
mhighh = mhighbh;
--shpost;
}
else
break;
}
*pm = mhighl;
mhighbit = mhighh & 1;
}
else
assert(0);
*pshpost = shpost;
return mhighbit;
}
/*************************************
* Find coefficients for Algorithm 4.2:
* Optimized code generation of unsigned q=n/d for constant nonzero d
* Input:
* N 32 or 64 (width of divide)
* d divisor (not a power of 2)
* Output:
* *pshpre pre-shift
* *pm factor
* *pshpost post-shift
* Returns:
* true Use algorithm:
* t1 = MULUH(m, n)
* q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
*
* false Use algorithm:
* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
*/
bool udiv_coefficients(int N, ullong d, int *pshpre, ullong *pm, int *pshpost)
{
#ifdef DEBUG
test_udiv_coefficients();
#endif
bool mhighbit = choose_multiplier(N, d, N, pm, pshpost);
if (mhighbit && (d & 1) == 0)
{
int e = 0;
while ((d & 1) == 0)
{ ++e;
d >>= 1;
}
*pshpre = e;
mhighbit = choose_multiplier(N, d, N - e, pm, pshpost);
assert(mhighbit == false);
}
else
*pshpre = 0;
return mhighbit;
}
#ifdef DEBUG
void test_udiv_coefficients()
{
static bool tested = false;
if (tested)
return;
tested = true;
struct S
{
int N;
ullong d;
int shpre;
int highbit;
ullong m;
int shpost;
};
static S table[] =
{
{ 32, 10, 0, 0, 0xCCCCCCCD, 3 },
{ 32, 13, 0, 0, 0x4EC4EC4F, 2 },
{ 32, 14, 1, 0, 0x92492493, 2 },
{ 32, 15, 0, 0, 0x88888889, 3 },
{ 32, 17, 0, 0, 0xF0F0F0F1, 4 },
{ 32, 14007, 0, 1, 0x2B71840D, 14 },
{ 64, 7, 0, 1, 0x2492492492492493, 3 },
{ 64, 10, 0, 0, 0xCCCCCCCCCCCCCCCD, 3 },
{ 64, 13, 0, 0, 0x4EC4EC4EC4EC4EC5, 2 },
{ 64, 14, 1, 0, 0x4924924924924925, 1 },
{ 64, 15, 0, 0, 0x8888888888888889, 3 },
{ 64, 17, 0, 0, 0xF0F0F0F0F0F0F0F1, 4 },
{ 64, 100, 2, 0, 0x28F5C28F5C28F5C3, 2 },
{ 64, 14007, 0, 1, 0x2B71840C5ADF02C3, 14 },
};
for (int i = 0; i < sizeof(table)/sizeof(table[0]); i++)
{ S *ps = &table[i];
ullong m;
int shpre;
int shpost;
bool mhighbit = udiv_coefficients(ps->N, ps->d, &shpre, &m, &shpost);
//printf("[%d] %d %d %llx %d\n", i, shpre, mhighbit, m, shpost);
assert(shpre == ps->shpre);
assert(mhighbit == ps->highbit);
assert(m == ps->m);
assert(shpost == ps->shpost);
}
}
#endif
#if 0
#include <stdlib.h>
void main(int argc, char **argv)
{
if (argc == 2)
{
ullong d = atoi(argv[1]);
ullong m;
int shpre;
int shpost;
bool mhighbit = udiv_coefficients(64, d, &shpre, &m, &shpost);
printf("%d %d %llx, %d\n", shpre, mhighbit, m, shpost);
}
}
#endif