From b390c7ec13b76a0ce2310c0640e0247ed427916a Mon Sep 17 00:00:00 2001 From: Martin Kinkelin Date: Tue, 25 Mar 2025 03:15:39 +0100 Subject: [PATCH] [druntime] core.int128: Add 64-bit mul() overload and optimize via inline asm on x86_64 --- changelog/druntime.int128.dd | 14 +++++++++ druntime/src/core/int128.d | 61 ++++++++++++++++++++++++++++++++---- 2 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 changelog/druntime.int128.dd diff --git a/changelog/druntime.int128.dd b/changelog/druntime.int128.dd new file mode 100644 index 0000000000..bc430fad6f --- /dev/null +++ b/changelog/druntime.int128.dd @@ -0,0 +1,14 @@ +`core.int128`: Add `mul` and `udivmod` overloads for 64-bit operands + +These map to a single x86_64 instruction and have accordingly been optimized via inline assembly. + +--- +import core.int128; + +ulong a, b; +Cent product128 = mul(a, b); + +ulong divisor64 = …; +ulong modulus64; +ulong quotient64 = udivmod(product128, divisor64, modulus64); +--- diff --git a/druntime/src/core/int128.d b/druntime/src/core/int128.d index 562cfc0093..b1247891e6 100644 --- a/druntime/src/core/int128.d +++ b/druntime/src/core/int128.d @@ -36,6 +36,12 @@ else else private enum Cent_alignment = (size_t.sizeof * 2); } +version (X86_64) +{ + version (GNU) version = GNU_OR_LDC_X86_64; + version (LDC) version = GNU_OR_LDC_X86_64; +} + /** * 128 bit integer type. * See_also: $(REF Int128, std,int128). @@ -453,6 +459,55 @@ Cent mul(Cent c1, Cent c2) return ret; } +/**************************** + * Multiply 64-bit operands u1 * u2 in 128-bit precision. + * Params: + * u1 = operand 1 + * u2 = operand 2 + * Returns: + * u1 * u2 in 128-bit precision + */ +pure +Cent mul(ulong u1, ulong u2) +{ + if (!__ctfe) + { + version (GNU_OR_LDC_X86_64) + { + Cent ret = void; + asm pure @trusted nothrow @nogc + { + "mulq %3" + : "=a"(ret.lo), "=d"(ret.hi) + : "a"(u1), "r"(u2) + : "cc"; + } + return ret; + } + else version (D_InlineAsm_X86_64) + { + U lo = void; + U hi = void; + asm pure @trusted nothrow @nogc + { + mov RAX, u1; + mul u2; + mov lo, RAX; + mov hi, RDX; + } + return Cent(lo: lo, hi: hi); + } + } + + return mul(Cent(lo: u1), Cent(lo: u2)); +} + +unittest +{ + assert(mul(3, 42) == Cent(lo: 126)); + assert(mul(1L << 60, 1 << 10) == Cent(hi: 1 << 6)); +} + /**************************** * Unsigned divide c1 / c2. @@ -562,12 +617,6 @@ Cent udivmod(Cent c1, Cent c2, out Cent modulus) return quotient; } -version (X86_64) -{ - version (GNU) version = GNU_OR_LDC_X86_64; - version (LDC) version = GNU_OR_LDC_X86_64; -} - /**************************** * Unsigned divide 128-bit c1 / 64-bit c2. The result must fit in 64 bits. * The remainder after division is stored to modulus.