diff --git a/compiler/src/dmd/backend/arm/cod1.d b/compiler/src/dmd/backend/arm/cod1.d index 585e12d03b..1643d460da 100644 --- a/compiler/src/dmd/backend/arm/cod1.d +++ b/compiler/src/dmd/backend/arm/cod1.d @@ -52,8 +52,8 @@ nothrow: * Params: * cs = EA information * reg = destination register - * szw = number of bytes to write - 4,8 - * szr = number of bytes to read - 1,2,4,8 + * szw = number of bytes to write - 4,8,16 + * szr = number of bytes to read - 1,2,4,8,16 */ void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr) { @@ -69,7 +69,13 @@ void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr) if (cs.reg != reg) // do not mov onto itself { assert(cs.reg & 32); - cs.Iop = INSTR.fmov(szw == 8,cs.reg,reg); // FMOV reg,cs.reg + if (szw == 16) + cs.Iop = INSTR.mov_orr_advsimd_reg(1,cs.reg,reg); // MOV Vd.16b,Vn.16b + else + { + uint ftype = INSTR.szToFtype(szw); + cs.Iop = INSTR.fmov(ftype,cs.reg,reg); // FMOV reg,cs.reg + } } } else if (cs.base != NOREG) @@ -128,7 +134,7 @@ void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr) * Params: * cs = EA information * reg = source register - * sz = number of bytes to store - 1,2,4,8 + * sz = number of bytes to store - 1,2,4,8,16 */ void storeToEA(ref code cs, reg_t reg, uint sz) { @@ -142,7 +148,13 @@ void storeToEA(ref code cs, reg_t reg, uint sz) if (cs.reg != reg) // do not mov onto itself { assert(cs.reg & 32); - cs.Iop = INSTR.fmov(sz == 8,reg,cs.reg); // FMOV cs.reg,reg + if (sz == 16) + cs.Iop = INSTR.mov_orr_advsimd_reg(1,reg,cs.reg); // MOV Vd.16b,Vn.16b + else + { + uint ftype = INSTR.szToFtype(sz); + cs.Iop = INSTR.fmov(ftype,cs.reg,reg); // FMOV reg,cs.reg + } } cs.IFL1 = FL.unde; } @@ -501,7 +513,7 @@ void loadea(ref CodeBuilder cdb,elem* e,ref code cs,uint op,reg_t reg,targ_size_ cs.IEV1.Voffset += offset; assert(op != LEA); // AArch64 does not have LEA - loadFromEA(cs,reg,sz == 8 ? 8 : 4,sz); + loadFromEA(cs,reg,sz >= 8 ? sz : 4,sz); getregs(cdb, desmsk); // save any regs we destroy cdb.gen(&cs); @@ -1984,7 +1996,7 @@ private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funca } regm_t retregs = tyfloating(tym) ? INSTR.FLOATREGS : cgstate.allregs; scodelem(cgstate,cdb, e, retregs, 0, true); - if (sz <= REGSIZE) + if (sz <= REGSIZE || tym == TYldouble) { reg_t reg = findreg(retregs); code cs; @@ -2072,6 +2084,9 @@ void loaddata(ref CodeBuilder cdb, elem* e, ref regm_t outretregs) double value = e.Vfloat; if (sz == 8) value = e.Vdouble; + else if (sz == 16) + // cannot implicitly convert expression `(*e).EV.Vldouble` of type `longdouble_soft` to `double` [D:\a\1\s\compiler\src\vcbuild\dmd.vcxproj] + value = cast(double)e.Vldouble; loadFloatRegConst(cdb,vreg,value,sz); fixresult(cdb, e, forregs, outretregs); return; @@ -2212,6 +2227,11 @@ void loaddata(ref CodeBuilder cdb, elem* e, ref regm_t outretregs) loadea(cdb, e, cs, opmv, reg, 0, 0, 0, RM.load); // MOVSS/MOVSD reg,data checkSetVex(cdb.last(),tym); } + else if (sz == 16 && tym == TYldouble) // TODO complex numbers? + { + loadea(cdb,e,cs,0,reg,0,0,0,RM.load); + outretregs = mask(reg) | flags; + } else if (sz <= REGSIZE) { if (tyfloating(tym)) diff --git a/compiler/src/dmd/backend/arm/cod4.d b/compiler/src/dmd/backend/arm/cod4.d index ee76051057..6b6d14051e 100644 --- a/compiler/src/dmd/backend/arm/cod4.d +++ b/compiler/src/dmd/backend/arm/cod4.d @@ -1457,7 +1457,7 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs) retregs = INSTR.FLOATREGS; const tym = tybasic(e.Ety); - reg_t Vd = allocreg(cdb,retregs,tym); // destination integer register + reg_t Vd = allocreg(cdb,retregs,tym); // destination floating point register switch (e.Eoper) { @@ -1474,6 +1474,11 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs) fixresult(cdb,e,retregs,pretregs); break; + case OPd_ld: // call __extenddftf2 + case OPld_d: // call __trunctfdf2 + cdb.gen1(INSTR.udf); // TODO AArch64 + break; + default: assert(0); } diff --git a/compiler/src/dmd/backend/arm/disasmarm.d b/compiler/src/dmd/backend/arm/disasmarm.d index 12f5bdb91b..bbb7986f4e 100644 --- a/compiler/src/dmd/backend/arm/disasmarm.d +++ b/compiler/src/dmd/backend/arm/disasmarm.d @@ -1774,7 +1774,7 @@ void disassemble(uint c) @trusted // Cryptographic AES if (field(ins, 31, 24) == 0x4E && field(ins, 21, 17) == 0x14 && field(ins, 11, 10) == 2) // https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptoaes { - url = "cryptoes"; + url = "cryptoaes"; uint size = field(ins, 23, 22); uint opcode = field(ins, 16, 12); uint Rn = field(ins, 9, 5); @@ -1796,11 +1796,47 @@ void disassemble(uint c) @trusted else // Cryptographic three-register SHA https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptosha3 + if (field(ins,31,24) == 0x5E && field(ins,21,21) == 0 && field(ins,15,15) == 0 && field(ins,11,10) == 0) + { + url = "cryptosha3"; + } + else + // Cryptographic two-register SHA https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#cryptosha2 + if (field(ins,31,24) == 0x5E && field(ins,21,17) == 0x14 && field(ins,11,10) == 2) + { + url = "cryptosha2"; + } + else + // Advanced SIMD scalar copy https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdone + if (field(ins,31,30) == 1 && field(ins,28,21) == 0xF0 && field(ins,15,15) == 0 && field(ins,10,10) == 1) + { + url = "asisdone"; + } + else + // Advanced SIMD scalar three same FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsamefp16 + if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,22,21) == 2 && field(ins,11,10) == 1) + { + url = "asisdsamefp16"; + } + else + // Advanced SIMD scalar two-register miscellaneous FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmiscfp16 + if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,22,17) == 0x3C && field(ins,15,14) == 0 && field(ins,11,10) == 2) + { + url = "asisdmiscfp16"; + } + else + // Advanced SIMD scalar three same extra https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame2 + if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,21) == 0 && field(ins,15,15) == 1 && field(ins,10,10) == 1) + { + url = "asisdsame2"; + } + else + // Advanced SIMD scalar two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2) { @@ -1820,7 +1856,14 @@ void disassemble(uint c) @trusted } } else + // Advanced SIMD scalar pairwise https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdpair + if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x18 && field(ins,11,10) == 2) + { + url = "asisdpair"; + } + else + // Advanced SIMD scalar three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisddiff // Advanced SIMD scalar three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame // Advanced SIMD scalar shift by immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdshf @@ -1828,7 +1871,20 @@ void disassemble(uint c) @trusted // Advanced SIMD table lookup https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdtbl // Advanced SIMD permute https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdperm // Advanced SIMD extract https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdext + // Advanced SIMD copy https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdins + if (field(ins,31,31) == 0 && field(ins,28,21) == 0x70 && field(ins,15,15) == 0 && field(ins,10,10) == 1) + { + url = "asimdins"; + uint Q = field(ins,30,30); + uint op = field(ins,29,29); + uint imm5 = field(ins,20,16); + uint imm4 = field(ins,14,11); + uint Rn = field(ins, 9, 5); + uint Rd = field(ins, 4, 0); + } + else + // Advanced SIMD three same (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsamefp16 // Advanced SIMD two-register miscellaneous (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmiscfp16 // Advanced SIMD three-register extension https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame2 @@ -1905,7 +1961,48 @@ void disassemble(uint c) @trusted else // Advanced SIMD three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimddiff + // Advanced SIMD three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame + if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,21) == 1 && field(ins,10,10) == 1) + { + url = "asimdsame"; + + uint Q = field(ins,30,30); + uint U = field(ins,29,29); + uint size = field(ins,23,22); + uint Rm = field(ins,20,16); + uint opcode = field(ins,15,11); + uint Rn = field(ins, 9, 5); + uint Rd = field(ins, 4, 0); + //printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rm:%d Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rm, Rn, Rd); + + uint Qn = (Q + 1) * 8; + switch (opcode) + { + case 3: + if (U == 0 && size == 2) + { + if (Rm == Rn) + { + p1 = "mov"; // https://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_advsimd_reg.html + uint n = snprintf(buf.ptr, cast(uint)buf.length, "v%d.%db,v%d.%db", Rd, Qn, Rn, Qn); + p2 = buf[0 .. n]; + } + else + { + p1 = "orr"; // https://www.scs.stanford.edu/~zyedidia/arm64/orr_advsimd.html + uint n = snprintf(buf.ptr, cast(uint)buf.length, "v%d.%db,v%d.%db,v%d.%db", Rd, Qn, Rn, Qn, Rm, Qn); + p2 = buf[0 .. n]; + } + } + break; + + default: + break; + } + } + else + // Advanced SIMD modified immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdimm // Advanced SIMD shift by immediate https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdshf // Advanced SIMD vector x indexed element https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdelem @@ -2855,8 +2952,9 @@ unittest unittest { int line64 = __LINE__; - string[80] cases64 = // 64 bit code gen + string[81] cases64 = // 64 bit code gen [ + "4E BE 1F C0 mov v0.16b,v30.16b", "D4 20 00 20 brk #1", "D6 3F 00 00 blr x0", "1E 21 43 FF fneg s31,s31", diff --git a/compiler/src/dmd/backend/arm/instr.d b/compiler/src/dmd/backend/arm/instr.d index 2d5581ce4b..f17d64873f 100644 --- a/compiler/src/dmd/backend/arm/instr.d +++ b/compiler/src/dmd/backend/arm/instr.d @@ -691,9 +691,21 @@ struct INSTR static uint uaddlv_advsimd(uint Q, uint size, reg_t Vn, reg_t Vd) { return asimdall(Q, 1, size, 3, Vn & 31, Vd & 31); } /* Advanced SIMD three different - * Advanced SIMD three same */ + /* Advanced SIMD three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame + */ + static uint asimdsame(uint Q, uint U, uint size, reg_t Rm, uint opcode, reg_t Rn, reg_t Rd) + { return (Q << 30) | (U << 29) | (0xE << 24) | (size << 22) | (1 << 21) | (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd; } + + /* ORR ., ., . https://www.scs.stanford.edu/~zyedidia/arm64/orr_advsimd_reg.html + */ + static uint orr_advsimd_reg(uint Q, reg_t Vm, reg_t Vn, reg_t Vd) { return asimdsame(Q,0,2,Vm & 31,3,Vn & 31,Vd & 31); } + + /* MOV ., ., . https://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_advsimd_reg.html + */ + static uint mov_orr_advsimd_reg(uint Q, reg_t Vn, reg_t Vd) { return orr_advsimd_reg(Q,Vn,Vn,Vd); } + /* Advanced SIMD modified immediate * http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdimm */ diff --git a/compiler/src/dmd/backend/backconfig.d b/compiler/src/dmd/backend/backconfig.d index 80451f83b3..0997ba4efc 100644 --- a/compiler/src/dmd/backend/backconfig.d +++ b/compiler/src/dmd/backend/backconfig.d @@ -362,7 +362,7 @@ static if (0) if (arm) { cfg.fpxmmregs = false; // add SIMD support later - util_set64(cfg.exe); + util_setAArch64(cfg.exe); type_init(); cod3_setAArch64(); } @@ -596,3 +596,22 @@ void util_set64(exefmt_t exe) _tyalignsize[TYrestrictPtr] = _tyalignsize[TYnptr]; _tyalignsize[TYfgPtr] = _tyalignsize[TYnptr]; } + +@trusted +void util_setAArch64(exefmt_t exe) +{ + util_set64(exe); + + if (exe & EX_windos) + { + _tysize[TYldouble] = 16; + _tysize[TYildouble] = 16; + _tysize[TYcldouble] = 16; + } + if (exe & EX_windos) + { + _tyalignsize[TYldouble] = 16; + _tyalignsize[TYildouble] = 16; + _tyalignsize[TYcldouble] = 16; + } +} diff --git a/compiler/src/dmd/backend/x86/cgcod.d b/compiler/src/dmd/backend/x86/cgcod.d index d26ed9e298..456e12cdeb 100644 --- a/compiler/src/dmd/backend/x86/cgcod.d +++ b/compiler/src/dmd/backend/x86/cgcod.d @@ -1637,7 +1637,8 @@ static if (0) } tym = tybasic(tym); uint size = _tysize[tym]; - if (cgstate.AArch64) + bool AArch64 = cgstate.AArch64; + if (AArch64) outretregs &= cgstate.allregs | INSTR.FLOATREGS; else outretregs &= mES | cgstate.allregs | XMMREGS | INSTR.FLOATREGS; @@ -1688,7 +1689,8 @@ L3: } } - if (size <= REGSIZE || retregs & XMMREGS) + // TODO AArch64 needs work on floating point and complex floats + if (size <= REGSIZE || (AArch64 ? retregs & INSTR.FLOATREGS : retregs & XMMREGS)) { if (r & ~mBP) r &= ~mBP; diff --git a/compiler/src/dmd/backend/x86/cod3.d b/compiler/src/dmd/backend/x86/cod3.d index a3360f5e69..b13a69ad80 100644 --- a/compiler/src/dmd/backend/x86/cod3.d +++ b/compiler/src/dmd/backend/x86/cod3.d @@ -1369,7 +1369,7 @@ static if (NTEXCEPTIONS) * Allocate registers for function return values. * * Params: - * cgstate = code generator state + * cg = code generator state * ty = return type * t = return type extended info * tyf = function type @@ -1381,11 +1381,11 @@ static if (NTEXCEPTIONS) * 0 if function returns on the stack or returns void. */ @trusted -regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2) +regm_t allocretregs(ref CGstate cg, const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2) { //printf("allocretregs() ty: %s\n", tym_str(ty)); reg1 = reg2 = NOREG; - auto AArch64 = cgstate.AArch64; + auto AArch64 = cg.AArch64; if (!(config.exe & EX_posix)) return regmask(ty, tyf); // for non-Posix ABI @@ -1540,6 +1540,11 @@ regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t ty assert(I64 || tyfloating(tym)); goto case 4; + case 16: + if (AArch64 && tym == TYldouble) + return rralloc.fpt(); + goto default; + default: assert(!AArch64); if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble) @@ -1736,7 +1741,7 @@ void doswitch(ref CGstate cg, ref CodeBuilder cdb, block* b) elem* e = b.Belem; elem_debug(e); docommas(cdb,e); - cgstate.stackclean++; + cg.stackclean++; tym_t tys = tybasic(e.Ety); int sz = _tysize[tys]; bool dword = (sz == 2 * REGSIZE);