initial support for 128 bit floats (#21269)

This commit is contained in:
Walter Bright 2025-04-19 16:48:01 -07:00 committed by GitHub
parent 856d4921a0
commit 6208d4da8d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 179 additions and 18 deletions

View file

@ -52,8 +52,8 @@ nothrow:
* Params:
* cs = EA information
* reg = destination register
* szw = number of bytes to write - 4,8
* szr = number of bytes to read - 1,2,4,8
* szw = number of bytes to write - 4,8,16
* szr = number of bytes to read - 1,2,4,8,16
*/
void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr)
{
@ -69,7 +69,13 @@ void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr)
if (cs.reg != reg) // do not mov onto itself
{
assert(cs.reg & 32);
cs.Iop = INSTR.fmov(szw == 8,cs.reg,reg); // FMOV reg,cs.reg
if (szw == 16)
cs.Iop = INSTR.mov_orr_advsimd_reg(1,cs.reg,reg); // MOV Vd.16b,Vn.16b
else
{
uint ftype = INSTR.szToFtype(szw);
cs.Iop = INSTR.fmov(ftype,cs.reg,reg); // FMOV reg,cs.reg
}
}
}
else if (cs.base != NOREG)
@ -128,7 +134,7 @@ void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr)
* Params:
* cs = EA information
* reg = source register
* sz = number of bytes to store - 1,2,4,8
* sz = number of bytes to store - 1,2,4,8,16
*/
void storeToEA(ref code cs, reg_t reg, uint sz)
{
@ -142,7 +148,13 @@ void storeToEA(ref code cs, reg_t reg, uint sz)
if (cs.reg != reg) // do not mov onto itself
{
assert(cs.reg & 32);
cs.Iop = INSTR.fmov(sz == 8,reg,cs.reg); // FMOV cs.reg,reg
if (sz == 16)
cs.Iop = INSTR.mov_orr_advsimd_reg(1,reg,cs.reg); // MOV Vd.16b,Vn.16b
else
{
uint ftype = INSTR.szToFtype(sz);
cs.Iop = INSTR.fmov(ftype,cs.reg,reg); // FMOV reg,cs.reg
}
}
cs.IFL1 = FL.unde;
}
@ -501,7 +513,7 @@ void loadea(ref CodeBuilder cdb,elem* e,ref code cs,uint op,reg_t reg,targ_size_
cs.IEV1.Voffset += offset;
assert(op != LEA); // AArch64 does not have LEA
loadFromEA(cs,reg,sz == 8 ? 8 : 4,sz);
loadFromEA(cs,reg,sz >= 8 ? sz : 4,sz);
getregs(cdb, desmsk); // save any regs we destroy
cdb.gen(&cs);
@ -1984,7 +1996,7 @@ private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funca
}
regm_t retregs = tyfloating(tym) ? INSTR.FLOATREGS : cgstate.allregs;
scodelem(cgstate,cdb, e, retregs, 0, true);
if (sz <= REGSIZE)
if (sz <= REGSIZE || tym == TYldouble)
{
reg_t reg = findreg(retregs);
code cs;
@ -2072,6 +2084,9 @@ void loaddata(ref CodeBuilder cdb, elem* e, ref regm_t outretregs)
double value = e.Vfloat;
if (sz == 8)
value = e.Vdouble;
else if (sz == 16)
// cannot implicitly convert expression `(*e).EV.Vldouble` of type `longdouble_soft` to `double` [D:\a\1\s\compiler\src\vcbuild\dmd.vcxproj]
value = cast(double)e.Vldouble;
loadFloatRegConst(cdb,vreg,value,sz);
fixresult(cdb, e, forregs, outretregs);
return;
@ -2212,6 +2227,11 @@ void loaddata(ref CodeBuilder cdb, elem* e, ref regm_t outretregs)
loadea(cdb, e, cs, opmv, reg, 0, 0, 0, RM.load); // MOVSS/MOVSD reg,data
checkSetVex(cdb.last(),tym);
}
else if (sz == 16 && tym == TYldouble) // TODO complex numbers?
{
loadea(cdb,e,cs,0,reg,0,0,0,RM.load);
outretregs = mask(reg) | flags;
}
else if (sz <= REGSIZE)
{
if (tyfloating(tym))

View file

@ -1457,7 +1457,7 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs)
retregs = INSTR.FLOATREGS;
const tym = tybasic(e.Ety);
reg_t Vd = allocreg(cdb,retregs,tym); // destination integer register
reg_t Vd = allocreg(cdb,retregs,tym); // destination floating point register
switch (e.Eoper)
{
@ -1474,6 +1474,11 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs)
fixresult(cdb,e,retregs,pretregs);
break;
case OPd_ld: // call __extenddftf2
case OPld_d: // call __trunctfdf2
cdb.gen1(INSTR.udf); // TODO AArch64
break;
default:
assert(0);
}

View file

@ -1774,7 +1774,7 @@ void disassemble(uint c) @trusted
// Cryptographic AES
if (field(ins, 31, 24) == 0x4E && field(ins, 21, 17) == 0x14 && field(ins, 11, 10) == 2) // https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptoaes
{
url = "cryptoes";
url = "cryptoaes";
uint size = field(ins, 23, 22);
uint opcode = field(ins, 16, 12);
uint Rn = field(ins, 9, 5);
@ -1796,11 +1796,47 @@ void disassemble(uint c) @trusted
else
// Cryptographic three-register SHA https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptosha3
if (field(ins,31,24) == 0x5E && field(ins,21,21) == 0 && field(ins,15,15) == 0 && field(ins,11,10) == 0)
{
url = "cryptosha3";
}
else
// Cryptographic two-register SHA https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptosha2
if (field(ins,31,24) == 0x5E && field(ins,21,17) == 0x14 && field(ins,11,10) == 2)
{
url = "cryptosha2";
}
else
// Advanced SIMD scalar copy https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdone
if (field(ins,31,30) == 1 && field(ins,28,21) == 0xF0 && field(ins,15,15) == 0 && field(ins,10,10) == 1)
{
url = "asisdone";
}
else
// Advanced SIMD scalar three same FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsamefp16
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,22,21) == 2 && field(ins,11,10) == 1)
{
url = "asisdsamefp16";
}
else
// Advanced SIMD scalar two-register miscellaneous FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmiscfp16
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,22,17) == 0x3C && field(ins,15,14) == 0 && field(ins,11,10) == 2)
{
url = "asisdmiscfp16";
}
else
// Advanced SIMD scalar three same extra https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame2
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,21) == 0 && field(ins,15,15) == 1 && field(ins,10,10) == 1)
{
url = "asisdsame2";
}
else
// Advanced SIMD scalar two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2)
{
@ -1820,7 +1856,14 @@ void disassemble(uint c) @trusted
}
}
else
// Advanced SIMD scalar pairwise https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdpair
if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x18 && field(ins,11,10) == 2)
{
url = "asisdpair";
}
else
// Advanced SIMD scalar three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisddiff
// Advanced SIMD scalar three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame
// Advanced SIMD scalar shift by immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdshf
@ -1828,7 +1871,20 @@ void disassemble(uint c) @trusted
// Advanced SIMD table lookup https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdtbl
// Advanced SIMD permute https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdperm
// Advanced SIMD extract https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdext
// Advanced SIMD copy https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdins
if (field(ins,31,31) == 0 && field(ins,28,21) == 0x70 && field(ins,15,15) == 0 && field(ins,10,10) == 1)
{
url = "asimdins";
uint Q = field(ins,30,30);
uint op = field(ins,29,29);
uint imm5 = field(ins,20,16);
uint imm4 = field(ins,14,11);
uint Rn = field(ins, 9, 5);
uint Rd = field(ins, 4, 0);
}
else
// Advanced SIMD three same (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsamefp16
// Advanced SIMD two-register miscellaneous (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmiscfp16
// Advanced SIMD three-register extension https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame2
@ -1905,7 +1961,48 @@ void disassemble(uint c) @trusted
else
// Advanced SIMD three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimddiff
// Advanced SIMD three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame
if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,21) == 1 && field(ins,10,10) == 1)
{
url = "asimdsame";
uint Q = field(ins,30,30);
uint U = field(ins,29,29);
uint size = field(ins,23,22);
uint Rm = field(ins,20,16);
uint opcode = field(ins,15,11);
uint Rn = field(ins, 9, 5);
uint Rd = field(ins, 4, 0);
//printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rm:%d Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rm, Rn, Rd);
uint Qn = (Q + 1) * 8;
switch (opcode)
{
case 3:
if (U == 0 && size == 2)
{
if (Rm == Rn)
{
p1 = "mov"; // https://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_advsimd_reg.html
uint n = snprintf(buf.ptr, cast(uint)buf.length, "v%d.%db,v%d.%db", Rd, Qn, Rn, Qn);
p2 = buf[0 .. n];
}
else
{
p1 = "orr"; // https://www.scs.stanford.edu/~zyedidia/arm64/orr_advsimd.html
uint n = snprintf(buf.ptr, cast(uint)buf.length, "v%d.%db,v%d.%db,v%d.%db", Rd, Qn, Rn, Qn, Rm, Qn);
p2 = buf[0 .. n];
}
}
break;
default:
break;
}
}
else
// Advanced SIMD modified immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdimm
// Advanced SIMD shift by immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdshf
// Advanced SIMD vector x indexed element https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdelem
@ -2855,8 +2952,9 @@ unittest
unittest
{
int line64 = __LINE__;
string[80] cases64 = // 64 bit code gen
string[81] cases64 = // 64 bit code gen
[
"4E BE 1F C0 mov v0.16b,v30.16b",
"D4 20 00 20 brk #1",
"D6 3F 00 00 blr x0",
"1E 21 43 FF fneg s31,s31",

View file

@ -691,9 +691,21 @@ struct INSTR
static uint uaddlv_advsimd(uint Q, uint size, reg_t Vn, reg_t Vd) { return asimdall(Q, 1, size, 3, Vn & 31, Vd & 31); }
/* Advanced SIMD three different
* Advanced SIMD three same
*/
/* Advanced SIMD three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame
*/
static uint asimdsame(uint Q, uint U, uint size, reg_t Rm, uint opcode, reg_t Rn, reg_t Rd)
{ return (Q << 30) | (U << 29) | (0xE << 24) | (size << 22) | (1 << 21) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd; }
/* ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> https://www.scs.stanford.edu/~zyedidia/arm64/orr_advsimd_reg.html
*/
static uint orr_advsimd_reg(uint Q, reg_t Vm, reg_t Vn, reg_t Vd) { return asimdsame(Q,0,2,Vm & 31,3,Vn & 31,Vd & 31); }
/* MOV <Vd>.<T>, <Vn>.<T>, <Vm>.<T> https://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_advsimd_reg.html
*/
static uint mov_orr_advsimd_reg(uint Q, reg_t Vn, reg_t Vd) { return orr_advsimd_reg(Q,Vn,Vn,Vd); }
/* Advanced SIMD modified immediate
* http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdimm
*/

View file

@ -362,7 +362,7 @@ static if (0)
if (arm)
{
cfg.fpxmmregs = false; // add SIMD support later
util_set64(cfg.exe);
util_setAArch64(cfg.exe);
type_init();
cod3_setAArch64();
}
@ -596,3 +596,22 @@ void util_set64(exefmt_t exe)
_tyalignsize[TYrestrictPtr] = _tyalignsize[TYnptr];
_tyalignsize[TYfgPtr] = _tyalignsize[TYnptr];
}
@trusted
void util_setAArch64(exefmt_t exe)
{
util_set64(exe);
if (exe & EX_windos)
{
_tysize[TYldouble] = 16;
_tysize[TYildouble] = 16;
_tysize[TYcldouble] = 16;
}
if (exe & EX_windos)
{
_tyalignsize[TYldouble] = 16;
_tyalignsize[TYildouble] = 16;
_tyalignsize[TYcldouble] = 16;
}
}

View file

@ -1637,7 +1637,8 @@ static if (0)
}
tym = tybasic(tym);
uint size = _tysize[tym];
if (cgstate.AArch64)
bool AArch64 = cgstate.AArch64;
if (AArch64)
outretregs &= cgstate.allregs | INSTR.FLOATREGS;
else
outretregs &= mES | cgstate.allregs | XMMREGS | INSTR.FLOATREGS;
@ -1688,7 +1689,8 @@ L3:
}
}
if (size <= REGSIZE || retregs & XMMREGS)
// TODO AArch64 needs work on floating point and complex floats
if (size <= REGSIZE || (AArch64 ? retregs & INSTR.FLOATREGS : retregs & XMMREGS))
{
if (r & ~mBP)
r &= ~mBP;

View file

@ -1369,7 +1369,7 @@ static if (NTEXCEPTIONS)
* Allocate registers for function return values.
*
* Params:
* cgstate = code generator state
* cg = code generator state
* ty = return type
* t = return type extended info
* tyf = function type
@ -1381,11 +1381,11 @@ static if (NTEXCEPTIONS)
* 0 if function returns on the stack or returns void.
*/
@trusted
regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2)
regm_t allocretregs(ref CGstate cg, const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2)
{
//printf("allocretregs() ty: %s\n", tym_str(ty));
reg1 = reg2 = NOREG;
auto AArch64 = cgstate.AArch64;
auto AArch64 = cg.AArch64;
if (!(config.exe & EX_posix))
return regmask(ty, tyf); // for non-Posix ABI
@ -1540,6 +1540,11 @@ regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t ty
assert(I64 || tyfloating(tym));
goto case 4;
case 16:
if (AArch64 && tym == TYldouble)
return rralloc.fpt();
goto default;
default:
assert(!AArch64);
if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble)
@ -1736,7 +1741,7 @@ void doswitch(ref CGstate cg, ref CodeBuilder cdb, block* b)
elem* e = b.Belem;
elem_debug(e);
docommas(cdb,e);
cgstate.stackclean++;
cg.stackclean++;
tym_t tys = tybasic(e.Ety);
int sz = _tysize[tys];
bool dword = (sz == 2 * REGSIZE);