[druntime] core.int128: Add 64-bit mul() overload and optimize via inline asm on x86_64

This commit is contained in:
Martin Kinkelin 2025-03-25 03:15:39 +01:00 committed by Nicholas Wilson
parent 6c160b78d5
commit b390c7ec13
2 changed files with 69 additions and 6 deletions

View file

@ -0,0 +1,14 @@
`core.int128`: Add `mul` and `udivmod` overloads for 64-bit operands
These map to a single x86_64 instruction and have accordingly been optimized via inline assembly.
---
import core.int128;
ulong a, b;
Cent product128 = mul(a, b);
ulong divisor64 = …;
ulong modulus64;
ulong quotient64 = udivmod(product128, divisor64, modulus64);
---

View file

@ -36,6 +36,12 @@ else
else private enum Cent_alignment = (size_t.sizeof * 2);
}
version (X86_64)
{
version (GNU) version = GNU_OR_LDC_X86_64;
version (LDC) version = GNU_OR_LDC_X86_64;
}
/**
* 128 bit integer type.
* See_also: $(REF Int128, std,int128).
@ -453,6 +459,55 @@ Cent mul(Cent c1, Cent c2)
return ret;
}
/****************************
* Multiply 64-bit operands u1 * u2 in 128-bit precision.
* Params:
* u1 = operand 1
* u2 = operand 2
* Returns:
* u1 * u2 in 128-bit precision
*/
pure
Cent mul(ulong u1, ulong u2)
{
if (!__ctfe)
{
version (GNU_OR_LDC_X86_64)
{
Cent ret = void;
asm pure @trusted nothrow @nogc
{
"mulq %3"
: "=a"(ret.lo), "=d"(ret.hi)
: "a"(u1), "r"(u2)
: "cc";
}
return ret;
}
else version (D_InlineAsm_X86_64)
{
U lo = void;
U hi = void;
asm pure @trusted nothrow @nogc
{
mov RAX, u1;
mul u2;
mov lo, RAX;
mov hi, RDX;
}
return Cent(lo: lo, hi: hi);
}
}
return mul(Cent(lo: u1), Cent(lo: u2));
}
unittest
{
assert(mul(3, 42) == Cent(lo: 126));
assert(mul(1L << 60, 1 << 10) == Cent(hi: 1 << 6));
}
/****************************
* Unsigned divide c1 / c2.
@ -562,12 +617,6 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus)
return quotient;
}
version (X86_64)
{
version (GNU) version = GNU_OR_LDC_X86_64;
version (LDC) version = GNU_OR_LDC_X86_64;
}
/****************************
* Unsigned divide 128-bit c1 / 64-bit c2. The result must fit in 64 bits.
* The remainder after division is stored to modulus.