add Arm conversions of double to integral (#20809)

This commit is contained in:
Walter Bright 2025-02-01 18:31:40 -08:00 committed by GitHub
parent be8668e938
commit f9f4048aed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 154 additions and 227 deletions

View file

@ -655,12 +655,6 @@ void cddivass(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
@trusted
void cdshass(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
{
if (cg.AArch64)
{
import dmd.backend.arm.cod4 : cdshass;
return cdshass(cg, cdb, e, pretregs);
}
//printf("cdshass(e=%p, pretregs = %s)\n",e,regm_str(pretregs));
elem* e1 = e.E1;
elem* e2 = e.E2;
@ -1227,15 +1221,13 @@ ret:
}
// longcmp
// cdcnvt
/*****************************
* Do conversions.
* Do floating point conversions.
* Depends on OPd_s32 and CLIB.dbllng being in sequence.
OPvp_fp
OPcvp_fp
OPd_s32
OPb_8
OPs32_d
OPd_s16
OPs16_d
@ -1254,212 +1246,67 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs)
//printf("cdcnvt: %p pretregs = %s\n", e, regm_str(pretregs));
//elem_print(e);
assert(!e); // these are floating point conversions, do them later
static immutable ubyte[2][16] clib =
[
[ OPd_s32, CLIB.dbllng ],
[ OPs32_d, CLIB.lngdbl ],
[ OPd_s16, CLIB.dblint ],
[ OPs16_d, CLIB.intdbl ],
[ OPd_u16, CLIB.dbluns ],
[ OPu16_d, CLIB.unsdbl ],
[ OPd_u32, CLIB.dblulng ],
[ OPu32_d, CLIB.ulngdbl ],
[ OPd_s64, CLIB.dblllng ],
[ OPs64_d, CLIB.llngdbl ],
[ OPd_u64, CLIB.dblullng ],
[ OPu64_d, CLIB.ullngdbl ],
[ OPd_f, CLIB.dblflt ],
[ OPf_d, CLIB.fltdbl ],
[ OPvp_fp, CLIB.vptrfptr ],
[ OPcvp_fp, CLIB.cvptrfptr]
];
if (!pretregs)
{
codelem(cgstate,cdb,e.E1,pretregs,false);
return;
}
regm_t retregs;
if (config.inline8087)
{
uint sf;
uint ftype;
switch (e.Eoper)
{
case OPld_d:
case OPd_ld:
case OPd_s16: // fcvtzs w0,d31 // sxth w0,w0
case OPd_s32: ftype = 1; sf = 0; goto L2; // fcvtzs w0,d31
case OPd_s64: ftype = 1; sf = 1; goto L2; // fcvtzs d31,d31 // fmov x0,d31
case OPd_u16: // fcvtzu w0,d31 // and w0,w0,#0xFFFF
case OPd_u32: // fcvtzu w0,d31
case OPd_u64: // fcvtzu d31,d31 // fmov x0,d31
L2:
regm_t retregs1 = ALLREGS; //INSTR.FLOATREGS;
retregs1 = mCX; // hack because no floating support in rest of code
// codelem(cgstate,cdb,e.E1,retregs1,false);
const reg_t V1 = findreg(retregs1); // source floating point register
regm_t retregs = pretregs & cg.allregs;
if (retregs == 0)
retregs = ALLREGS & cgstate.allregs;
const tym = tybasic(e.Ety);
reg_t Rd = allocreg(cdb,retregs,tym); // destination integer register
switch (e.Eoper)
{
if (tycomplex(e.E1.Ety))
{
Lcomplex:
regm_t retregsx = mST01 | (pretregs & mPSW);
codelem(cgstate,cdb,e.E1, retregsx, false);
fixresult_complex87(cdb, e, retregsx, pretregs);
return;
}
regm_t retregsx = mST0 | (pretregs & mPSW);
codelem(cgstate,cdb,e.E1, retregsx, false);
fixresult87(cdb, e, retregsx, pretregs);
return;
}
case OPf_d:
case OPd_f:
if (tycomplex(e.E1.Ety))
goto Lcomplex;
if (config.fpxmmregs && pretregs & XMMREGS)
{
xmmcnvt(cdb, e, pretregs);
return;
}
/* if won't do us much good to transfer back and */
/* forth between 8088 registers and 8087 registers */
if (OTcall(e.E1.Eoper) && !(pretregs & cgstate.allregs))
{
retregs = regmask(e.E1.Ety, e.E1.E1.Ety);
if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0
{
codelem(cgstate,cdb,e.E1,pretregs,false);
if (pretregs & mST0)
note87(e, 0, 0);
return;
}
else
break;
}
goto Lload87;
case OPs64_d:
if (!I64)
goto Lload87;
goto case OPs32_d;
case OPs32_d:
if (config.fpxmmregs && pretregs & XMMREGS)
{
xmmcnvt(cdb, e, pretregs);
return;
}
goto Lload87;
case OPs16_d:
case OPu16_d:
Lload87:
load87(cdb,e,0,pretregs,null,-1);
return;
case OPu32_d:
if (I64 && config.fpxmmregs && pretregs & XMMREGS)
{
xmmcnvt(cdb,e,pretregs);
return;
}
else if (!I16)
{
regm_t retregsx = ALLREGS;
codelem(cgstate,cdb,e.E1, retregsx, false);
reg_t reg = findreg(retregsx);
cdb.genfltreg(STO, reg, 0);
reg = regwithvalue(cdb,ALLREGS,0,0);
cdb.genfltreg(STO, reg, 4);
push87(cdb);
cdb.genfltreg(0xDF,5,0); // FILD m64int
regm_t retregsy = mST0 /*| (pretregs & mPSW)*/;
fixresult87(cdb, e, retregsy, pretregs);
return;
}
break;
case OPd_s64:
if (!I64)
goto Lcnvt87;
goto case OPd_s32;
case OPd_s16:
case OPd_s32:
if (config.fpxmmregs)
{
xmmcnvt(cdb,e,pretregs);
return;
}
goto Lcnvt87;
case OPd_u16:
Lcnvt87:
cnvt87(cdb,e,pretregs);
return;
case OPd_u32: // use subroutine, not 8087
if (I64 && config.fpxmmregs)
{
xmmcnvt(cdb,e,pretregs);
return;
}
if (I32 || I64)
{
cdd_u32(cdb,e,pretregs);
return;
}
if (config.exe & EX_posix)
{
retregs = mST0;
}
else
{
retregs = DOUBLEREGS;
}
goto L1;
case OPd_u64:
if (I32 || I64)
{
cdd_u64(cdb,e,pretregs);
return;
}
retregs = DOUBLEREGS;
goto L1;
case OPu64_d:
if (pretregs & mST0)
{
regm_t retregsx = I64 ? mAX : mAX|mDX;
codelem(cgstate,cdb,e.E1,retregsx,false);
callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
return;
}
cdb.gen1(INSTR.fcvtzs(0,ftype,V1 & 31,Rd)); // fcvtzs Rd,V1
cdb.gen1(INSTR.sxth_sbfm(0,Rd,Rd)); // sxth Rd,Rd
break;
case OPd_s32:
cdb.gen1(INSTR.fcvtzs(0,1,V1 & 31,Rd)); // fcvtzs Rd,V1
break;
case OPd_s64:
cdb.gen1(INSTR.fcvtzs(1,1,V1,V1)); // fcvtzs V1,V1
cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,V1 & 31,Rd)); // fmov Rd,V1
break;
case OPd_u16:
cdb.gen1(INSTR.fcvtzu(0,ftype,V1 & 31,Rd)); // fcvtzu Rd,V1
cdb.gen1(INSTR.sxth_sbfm(0,Rd,Rd)); // and Rd,Rd,#0xFFFF
break;
case OPd_u32:
cdb.gen1(INSTR.fcvtzu(0,1,V1 & 31,Rd)); // fcvtzu Rd,V1
break;
case OPd_u64:
cdb.gen1(INSTR.fcvtzu(1,1,V1,V1)); // fcvtzu V1,V1
cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,V1 & 31,Rd)); // fmov Rd,V1
break;
default:
assert(0);
}
case OPld_u64:
{
if (I32 || I64)
{
cdd_u64(cdb,e,pretregs);
return;
}
regm_t retregsx = mST0;
codelem(cgstate,cdb,e.E1,retregsx,false);
callclib(cdb,e,CLIB.ld_u64,pretregs,0);
return;
}
fixresult(cdb,e,retregs,pretregs);
break;
default:
break;
}
}
retregs = regmask(e.E1.Ety, TYnfunc);
L1:
codelem(cgstate,cdb,e.E1,retregs,false);
for (int i = 0; 1; i++)
{
assert(i < clib.length);
if (clib[i][0] == e.Eoper)
{
callclib(cdb,e,clib[i][1],pretregs,0);
break;
}
assert(0);
}
}
@ -1941,9 +1788,10 @@ void cdpopcnt(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
const R1 = findreg(retregs1); // source register
regm_t vregs = ALLREGS; // floating point register
regm_t vregs = INSTR.FLOATREGS; // possible floating point registers
reg_t Vx = allocreg(cdb, vregs, TYdouble);
Vx &= 31; // to AArch64 register number
cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,R1,Vx)); // FMOV Dx,X1
cdb.gen1(INSTR.cnt_advsimd(0,0,Vx,Vx)); // CNT Vx.8b,Vx.8b
cdb.gen1(INSTR.addv_advsimd(0,0,Vx,Vx)); // ADDV Bx,Vx.8b

View file

@ -1799,35 +1799,25 @@ void disassemble(uint c) @trusted
// Advanced SIMD scalar three same FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsamefp16
// Advanced SIMD scalar two-register miscellaneous FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmiscfp16
// Advanced SIMD scalar three same extra https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame2
// Advanced SIMD two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc
if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2)
// Advanced SIMD scalar two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2)
{
url = "asisdmisc";
uint Q = field(ins,30,30);
uint U = field(ins,29,29);
uint size = field(ins,23,22);
uint opcode = field(ins,16,12);
uint Rn = field(ins, 9, 5);
uint Rd = field(ins, 4, 0);
//printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rn, Rd);
if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html
{
p1 = "cnt"; // cnt <Vd>.<T>, <Vn>.<T>
p2 = vregString(rbuf[0 .. 7], Q, Rd);
p3 = vregString(rbuf[8 .. 14], Q, Rn);
//printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr);
}
else if (U == 0 && (size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
{
p1 = "fcvtzs";
if (size & 2 && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
{ // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html
p1 = U == 0 ? "fcvtzs" // fcvtzs <V><d>, <V><n> Scalar single-precision and double-precision
: "fcvtzu"; // fcvtzu <V><d>, <V><n> Scalar single-precision and double-precision
p2 = fregString(rbuf[0 .. 4],"sd h"[size & 1],Rd);
p3 = fregString(rbuf[4 .. 8],"sd h"[size & 1],Rn);
}
}
else
// Advanced SIMD scalar pairwise https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdpair
// Advanced SIMD scalar three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisddiff
// Advanced SIMD scalar three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame
@ -1840,7 +1830,41 @@ void disassemble(uint c) @trusted
// Advanced SIMD three same (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsamefp16
// Advanced SIMD two-register miscellaneous (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmiscfp16
// Advanced SIMD three-register extension https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame2
// Advanced SIMD two-register miscellaneous https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmisc
if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2)
{
url = "asimdmisc";
uint Q = field(ins,30,30);
uint U = field(ins,29,29);
uint size = field(ins,23,22);
uint opcode = field(ins,16,12);
uint Rn = field(ins, 9, 5);
uint Rd = field(ins, 4, 0);
//printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rn, Rd);
immutable string[4] sizeQ = ["2S","4S","","2D"];
if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html
{
p1 = "cnt"; // cnt <Vd>.<T>, <Vn>.<T>
p2 = vregString(rbuf[0 .. 7], Q, Rd);
p3 = vregString(rbuf[8 .. 14], Q, Rn);
//printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr);
}
else if ((size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
{ // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html
p1 = U == 0 ? "fcvtzs" // fcvtzs <Vd>.<T>, <Vn>.<T> Vector single-precision and double-precision
: "fcvtzu"; // fcvtzu <Vd>.<T>, <Vn>.<T> Vector single-precision and double-precision
uint n = snprintf(rbuf.ptr, 7, "v%d.%s", Rd, sizeQ[(size & 1) * 2 + Q].ptr);
p2 = buf[0 .. n];
uint m = snprintf(rbuf.ptr + 7, 7, "v%d.%s", Rn, sizeQ[(size & 1) * 2 + Q].ptr);
p3 = buf[7 .. 7 + m];
}
}
else
// Advanced SIMD across lanes https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdall
if (field(ins,31,31) == 0 &&
field(ins,28,24) == 0x0E &&
@ -1939,9 +1963,9 @@ void disassemble(uint c) @trusted
p2 = regString(sf,Rd);
p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn);
}
else if (rmode == 3 && opcode == 0)
else if (rmode == 3 && (opcode & ~1) == 0)
{
p1 = "fcvtzs";
p1 = opcode ? "fcnvtzu" : "fcvtzs";
p2 = regString(sf,Rd);
p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn);
}
@ -2015,6 +2039,7 @@ void disassemble(uint c) @trusted
p1 = ""; // no support half-float literals
p3 = doubletostring(f);
}
else
// Floating-point conditional compare
@ -2773,8 +2798,9 @@ unittest
unittest
{
int line64 = __LINE__;
string[66] cases64 = // 64 bit code gen
string[67] cases64 = // 64 bit code gen
[
"5E E1 BB FE fcvtzs d30,d31",
"0E 31 BB FF addv b31,v31.8b",
"2E 30 38 00 uaddlv h0,v0.8b",
"0E 20 58 00 cnt v0.8b,v0.8b",

View file

@ -39,6 +39,11 @@ struct INSTR
{
pure nothrow:
/* Even though the floating point registers are V0..31, we call them 32-63 so they fit
* into regm_t. Remember to and them with 31 to generate an instruction
*/
enum FLOATREGS = 0xFFFF_FFFF_0000_0000;
enum uint nop = 0xD503201F;
alias reg_t = ubyte;
@ -538,8 +543,37 @@ struct INSTR
* Advanced SIMD scalar three same FP16
* Advanced SIMD scalar two-register miscellaneous FP16
* Advanced SIMD scalar three same extra
* Advanced SIMD scalar two-register miscellaneous
* Advanced SIMD scalar pairwise
*/
/* Advanced SIMD scalar two-register miscellaneous
* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc
*/
static uint asisdmisc(uint U, uint size, uint opcode, reg_t Rn, reg_t Rd)
{
uint ins = (1 << 30) |
(U << 29) |
(0x1E << 24) |
(size << 22) |
(0x10 << 17) |
(opcode << 12) |
(2 << 10) |
(Rn << 5) |
Rd;
return ins;
}
/* FCVTZS <V><d>,<V><n> https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
* Scalar single-precision and double-precision
*/
static uint fcvtzs_asisdmisc(uint sz, reg_t Rn, reg_t Rd) { return asisdmisc(0, 2|sz, 0x1B, Rn, Rd); }
/* FCVTZU <V><d>,<V><n> https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html
* Scalar single-precision and double-precision
*/
static uint fcvtzu_asisdmisc(uint sz, reg_t Rn, reg_t Rd) { return asisdmisc(1, 2|sz, 0x1B, Rn, Rd); }
/* Advanced SIMD scalar pairwise
* Advanced SIMD scalar three different
* Advanced SIMD scalar three same
* Advanced SIMD scalar shift by immediate
@ -576,6 +610,16 @@ struct INSTR
*/
static uint cnt_advsimd(uint Q, uint size, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 0, size, 5, Rn, Rd); }
/* FCVTZS <Vd>.<T>,<Vn>.<T> https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
* Vector single-precision and double-precision
*/
static uint fcvtzs_asimdmisc(uint Q, uint sz, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 0, 2|sz, 0x1B, Rn, Rd); }
/* FCVTZU <Vd>.<T>,<Vn>.<T> https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html
* Vector single-precision and double-precision
*/
static uint fcvtzu_asimdmisc(uint Q, uint sz, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 1, 2|sz, 0x1B, Rn, Rd); }
/* Advanced SIMD across lanes
* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdall
*/
@ -623,7 +667,7 @@ struct INSTR
* Conversion between floating-point and fixed-point
*/
/* Converstion between floating-point and integer https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#float2int
/* Conversion between floating-point and integer https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#float2int
*/
static uint float2int(uint sf, uint S, uint ftype, uint rmode, uint opcode, reg_t Rn, reg_t Rd)
{
@ -651,6 +695,14 @@ struct INSTR
return float2int(sf, 0, ftype, 0, 1, Rn, Rd);
}
/* FCVTZS (scalar, integer) https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_float_int.html
*/
static uint fcvtzs(uint sf, uint ftype, reg_t Rn, reg_t Rd) { return float2int(sf, 0, ftype, 3, 0, Rn, Rd); }
/* FCVTZU (scalar, integer) https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_float_int.html
*/
static uint fcvtzu(uint sf, uint ftype, reg_t Rn, reg_t Rd) { return float2int(sf, 0, ftype, 3, 1, Rn, Rd); }
/* Floating-point data-processing (1 source)
* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#floatdp1
*/

View file

@ -3216,6 +3216,7 @@ ret:
@trusted
void longcmp(ref CodeBuilder cdb, elem* e, bool jcond, FL fltarg, code* targ)
{
assert(!cgstate.AArch64);
// <= > < >=
static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];