diff --git a/compiler/src/dmd/backend/arm/cod4.d b/compiler/src/dmd/backend/arm/cod4.d index 8262985593..44771cb4db 100644 --- a/compiler/src/dmd/backend/arm/cod4.d +++ b/compiler/src/dmd/backend/arm/cod4.d @@ -655,12 +655,6 @@ void cddivass(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs) @trusted void cdshass(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs) { - if (cg.AArch64) - { - import dmd.backend.arm.cod4 : cdshass; - return cdshass(cg, cdb, e, pretregs); - } - //printf("cdshass(e=%p, pretregs = %s)\n",e,regm_str(pretregs)); elem* e1 = e.E1; elem* e2 = e.E2; @@ -1227,15 +1221,13 @@ ret: } // longcmp -// cdcnvt /***************************** - * Do conversions. + * Do floating point conversions. * Depends on OPd_s32 and CLIB.dbllng being in sequence. OPvp_fp OPcvp_fp OPd_s32 - OPb_8 OPs32_d OPd_s16 OPs16_d @@ -1254,212 +1246,67 @@ void cdcnvt(ref CGstate cg, ref CodeBuilder cdb,elem* e, ref regm_t pretregs) //printf("cdcnvt: %p pretregs = %s\n", e, regm_str(pretregs)); //elem_print(e); - assert(!e); // these are floating point conversions, do them later - static immutable ubyte[2][16] clib = - [ - [ OPd_s32, CLIB.dbllng ], - [ OPs32_d, CLIB.lngdbl ], - [ OPd_s16, CLIB.dblint ], - [ OPs16_d, CLIB.intdbl ], - [ OPd_u16, CLIB.dbluns ], - [ OPu16_d, CLIB.unsdbl ], - [ OPd_u32, CLIB.dblulng ], - [ OPu32_d, CLIB.ulngdbl ], - [ OPd_s64, CLIB.dblllng ], - [ OPs64_d, CLIB.llngdbl ], - [ OPd_u64, CLIB.dblullng ], - [ OPu64_d, CLIB.ullngdbl ], - [ OPd_f, CLIB.dblflt ], - [ OPf_d, CLIB.fltdbl ], - [ OPvp_fp, CLIB.vptrfptr ], - [ OPcvp_fp, CLIB.cvptrfptr] - ]; - if (!pretregs) { codelem(cgstate,cdb,e.E1,pretregs,false); return; } - regm_t retregs; - if (config.inline8087) + uint sf; + uint ftype; + switch (e.Eoper) { - switch (e.Eoper) - { - case OPld_d: - case OPd_ld: + case OPd_s16: // fcvtzs w0,d31 // sxth w0,w0 + case OPd_s32: ftype = 1; sf = 0; goto L2; // fcvtzs w0,d31 + case OPd_s64: ftype = 1; sf = 1; goto L2; // fcvtzs d31,d31 // fmov x0,d31 + case OPd_u16: // fcvtzu w0,d31 // and w0,w0,#0xFFFF + case OPd_u32: // fcvtzu w0,d31 + case OPd_u64: // fcvtzu d31,d31 // fmov x0,d31 + L2: + regm_t retregs1 = ALLREGS; //INSTR.FLOATREGS; +retregs1 = mCX; // hack because no floating support in rest of code +// codelem(cgstate,cdb,e.E1,retregs1,false); + const reg_t V1 = findreg(retregs1); // source floating point register + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) + retregs = ALLREGS & cgstate.allregs; + const tym = tybasic(e.Ety); + reg_t Rd = allocreg(cdb,retregs,tym); // destination integer register + + switch (e.Eoper) { - if (tycomplex(e.E1.Ety)) - { - Lcomplex: - regm_t retregsx = mST01 | (pretregs & mPSW); - codelem(cgstate,cdb,e.E1, retregsx, false); - fixresult_complex87(cdb, e, retregsx, pretregs); - return; - } - regm_t retregsx = mST0 | (pretregs & mPSW); - codelem(cgstate,cdb,e.E1, retregsx, false); - fixresult87(cdb, e, retregsx, pretregs); - return; + case OPd_s16: + cdb.gen1(INSTR.fcvtzs(0,ftype,V1 & 31,Rd)); // fcvtzs Rd,V1 + cdb.gen1(INSTR.sxth_sbfm(0,Rd,Rd)); // sxth Rd,Rd + break; + case OPd_s32: + cdb.gen1(INSTR.fcvtzs(0,1,V1 & 31,Rd)); // fcvtzs Rd,V1 + break; + case OPd_s64: + cdb.gen1(INSTR.fcvtzs(1,1,V1,V1)); // fcvtzs V1,V1 + cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,V1 & 31,Rd)); // fmov Rd,V1 + break; + case OPd_u16: + cdb.gen1(INSTR.fcvtzu(0,ftype,V1 & 31,Rd)); // fcvtzu Rd,V1 + cdb.gen1(INSTR.sxth_sbfm(0,Rd,Rd)); // and Rd,Rd,#0xFFFF + break; + case OPd_u32: + cdb.gen1(INSTR.fcvtzu(0,1,V1 & 31,Rd)); // fcvtzu Rd,V1 + break; + case OPd_u64: + cdb.gen1(INSTR.fcvtzu(1,1,V1,V1)); // fcvtzu V1,V1 + cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,V1 & 31,Rd)); // fmov Rd,V1 + break; + default: + assert(0); } - case OPf_d: - case OPd_f: - if (tycomplex(e.E1.Ety)) - goto Lcomplex; - if (config.fpxmmregs && pretregs & XMMREGS) - { - xmmcnvt(cdb, e, pretregs); - return; - } - - /* if won't do us much good to transfer back and */ - /* forth between 8088 registers and 8087 registers */ - if (OTcall(e.E1.Eoper) && !(pretregs & cgstate.allregs)) - { - retregs = regmask(e.E1.Ety, e.E1.E1.Ety); - if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 - { - codelem(cgstate,cdb,e.E1,pretregs,false); - if (pretregs & mST0) - note87(e, 0, 0); - return; - } - else - break; - } - goto Lload87; - - case OPs64_d: - if (!I64) - goto Lload87; - goto case OPs32_d; - - case OPs32_d: - if (config.fpxmmregs && pretregs & XMMREGS) - { - xmmcnvt(cdb, e, pretregs); - return; - } - goto Lload87; - - case OPs16_d: - case OPu16_d: - Lload87: - load87(cdb,e,0,pretregs,null,-1); - return; - - case OPu32_d: - if (I64 && config.fpxmmregs && pretregs & XMMREGS) - { - xmmcnvt(cdb,e,pretregs); - return; - } - else if (!I16) - { - regm_t retregsx = ALLREGS; - codelem(cgstate,cdb,e.E1, retregsx, false); - reg_t reg = findreg(retregsx); - cdb.genfltreg(STO, reg, 0); - reg = regwithvalue(cdb,ALLREGS,0,0); - cdb.genfltreg(STO, reg, 4); - - push87(cdb); - cdb.genfltreg(0xDF,5,0); // FILD m64int - - regm_t retregsy = mST0 /*| (pretregs & mPSW)*/; - fixresult87(cdb, e, retregsy, pretregs); - return; - } - break; - - case OPd_s64: - if (!I64) - goto Lcnvt87; - goto case OPd_s32; - - case OPd_s16: - case OPd_s32: - if (config.fpxmmregs) - { - xmmcnvt(cdb,e,pretregs); - return; - } - goto Lcnvt87; - - case OPd_u16: - Lcnvt87: - cnvt87(cdb,e,pretregs); - return; - - case OPd_u32: // use subroutine, not 8087 - if (I64 && config.fpxmmregs) - { - xmmcnvt(cdb,e,pretregs); - return; - } - if (I32 || I64) - { - cdd_u32(cdb,e,pretregs); - return; - } - if (config.exe & EX_posix) - { - retregs = mST0; - } - else - { - retregs = DOUBLEREGS; - } - goto L1; - - case OPd_u64: - if (I32 || I64) - { - cdd_u64(cdb,e,pretregs); - return; - } - retregs = DOUBLEREGS; - goto L1; - - case OPu64_d: - if (pretregs & mST0) - { - regm_t retregsx = I64 ? mAX : mAX|mDX; - codelem(cgstate,cdb,e.E1,retregsx,false); - callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); - return; - } - break; - - case OPld_u64: - { - if (I32 || I64) - { - cdd_u64(cdb,e,pretregs); - return; - } - regm_t retregsx = mST0; - codelem(cgstate,cdb,e.E1,retregsx,false); - callclib(cdb,e,CLIB.ld_u64,pretregs,0); - return; - } - - default: - break; - } - } - retregs = regmask(e.E1.Ety, TYnfunc); -L1: - codelem(cgstate,cdb,e.E1,retregs,false); - for (int i = 0; 1; i++) - { - assert(i < clib.length); - if (clib[i][0] == e.Eoper) - { - callclib(cdb,e,clib[i][1],pretregs,0); + fixresult(cdb,e,retregs,pretregs); break; - } + + default: + assert(0); } } @@ -1941,9 +1788,10 @@ void cdpopcnt(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs) const R1 = findreg(retregs1); // source register - regm_t vregs = ALLREGS; // floating point register + regm_t vregs = INSTR.FLOATREGS; // possible floating point registers reg_t Vx = allocreg(cdb, vregs, TYdouble); + Vx &= 31; // to AArch64 register number cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,R1,Vx)); // FMOV Dx,X1 cdb.gen1(INSTR.cnt_advsimd(0,0,Vx,Vx)); // CNT Vx.8b,Vx.8b cdb.gen1(INSTR.addv_advsimd(0,0,Vx,Vx)); // ADDV Bx,Vx.8b diff --git a/compiler/src/dmd/backend/arm/disasmarm.d b/compiler/src/dmd/backend/arm/disasmarm.d index 8958f3eb84..340f8d6e28 100644 --- a/compiler/src/dmd/backend/arm/disasmarm.d +++ b/compiler/src/dmd/backend/arm/disasmarm.d @@ -1799,35 +1799,25 @@ void disassemble(uint c) @trusted // Advanced SIMD scalar three same FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsamefp16 // Advanced SIMD scalar two-register miscellaneous FP16 https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmiscfp16 // Advanced SIMD scalar three same extra https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame2 - - // Advanced SIMD two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc - if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2) + // Advanced SIMD scalar two-register miscellaneous http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc + if (field(ins,31,30) == 1 && field(ins,28,24) == 0x1E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2) { url = "asisdmisc"; - uint Q = field(ins,30,30); uint U = field(ins,29,29); uint size = field(ins,23,22); uint opcode = field(ins,16,12); uint Rn = field(ins, 9, 5); uint Rd = field(ins, 4, 0); - //printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rn, Rd); - if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html - { - p1 = "cnt"; // cnt ., . - p2 = vregString(rbuf[0 .. 7], Q, Rd); - p3 = vregString(rbuf[8 .. 14], Q, Rn); - //printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr); - } - else if (U == 0 && (size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html - { - p1 = "fcvtzs"; + if (size & 2 && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html + { // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html + p1 = U == 0 ? "fcvtzs" // fcvtzs , Scalar single-precision and double-precision + : "fcvtzu"; // fcvtzu , Scalar single-precision and double-precision p2 = fregString(rbuf[0 .. 4],"sd h"[size & 1],Rd); p3 = fregString(rbuf[4 .. 8],"sd h"[size & 1],Rn); } } else - // Advanced SIMD scalar pairwise https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdpair // Advanced SIMD scalar three different https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisddiff // Advanced SIMD scalar three same https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdsame @@ -1840,7 +1830,41 @@ void disassemble(uint c) @trusted // Advanced SIMD three same (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsamefp16 // Advanced SIMD two-register miscellaneous (FP16) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmiscfp16 // Advanced SIMD three-register extension https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdsame2 + // Advanced SIMD two-register miscellaneous https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdmisc + if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && field(ins,21,17) == 0x10 && field(ins,11,10) == 2) + { + url = "asimdmisc"; + uint Q = field(ins,30,30); + uint U = field(ins,29,29); + uint size = field(ins,23,22); + uint opcode = field(ins,16,12); + uint Rn = field(ins, 9, 5); + uint Rd = field(ins, 4, 0); + //printf("ins:%08x Q:%d U:%d size:%d opcode:%x Rn:%d Rd:%d\n", ins, Q, U, size, opcode, Rn, Rd); + + immutable string[4] sizeQ = ["2S","4S","","2D"]; + + if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html + { + p1 = "cnt"; // cnt ., . + p2 = vregString(rbuf[0 .. 7], Q, Rd); + p3 = vregString(rbuf[8 .. 14], Q, Rn); + //printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr); + } + else if ((size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html + { // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html + p1 = U == 0 ? "fcvtzs" // fcvtzs ., . Vector single-precision and double-precision + : "fcvtzu"; // fcvtzu ., . Vector single-precision and double-precision + + uint n = snprintf(rbuf.ptr, 7, "v%d.%s", Rd, sizeQ[(size & 1) * 2 + Q].ptr); + p2 = buf[0 .. n]; + uint m = snprintf(rbuf.ptr + 7, 7, "v%d.%s", Rn, sizeQ[(size & 1) * 2 + Q].ptr); + p3 = buf[7 .. 7 + m]; + } + } + else + // Advanced SIMD across lanes https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdall if (field(ins,31,31) == 0 && field(ins,28,24) == 0x0E && @@ -1939,9 +1963,9 @@ void disassemble(uint c) @trusted p2 = regString(sf,Rd); p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn); } - else if (rmode == 3 && opcode == 0) + else if (rmode == 3 && (opcode & ~1) == 0) { - p1 = "fcvtzs"; + p1 = opcode ? "fcnvtzu" : "fcvtzs"; p2 = regString(sf,Rd); p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn); } @@ -2015,6 +2039,7 @@ void disassemble(uint c) @trusted p1 = ""; // no support half-float literals p3 = doubletostring(f); } + else // Floating-point conditional compare @@ -2773,8 +2798,9 @@ unittest unittest { int line64 = __LINE__; - string[66] cases64 = // 64 bit code gen + string[67] cases64 = // 64 bit code gen [ + "5E E1 BB FE fcvtzs d30,d31", "0E 31 BB FF addv b31,v31.8b", "2E 30 38 00 uaddlv h0,v0.8b", "0E 20 58 00 cnt v0.8b,v0.8b", diff --git a/compiler/src/dmd/backend/arm/instr.d b/compiler/src/dmd/backend/arm/instr.d index 441ca5e2eb..51dc077be8 100644 --- a/compiler/src/dmd/backend/arm/instr.d +++ b/compiler/src/dmd/backend/arm/instr.d @@ -39,6 +39,11 @@ struct INSTR { pure nothrow: + /* Even though the floating point registers are V0..31, we call them 32-63 so they fit + * into regm_t. Remember to and them with 31 to generate an instruction + */ + enum FLOATREGS = 0xFFFF_FFFF_0000_0000; + enum uint nop = 0xD503201F; alias reg_t = ubyte; @@ -538,8 +543,37 @@ struct INSTR * Advanced SIMD scalar three same FP16 * Advanced SIMD scalar two-register miscellaneous FP16 * Advanced SIMD scalar three same extra - * Advanced SIMD scalar two-register miscellaneous - * Advanced SIMD scalar pairwise + */ + + /* Advanced SIMD scalar two-register miscellaneous + * https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asisdmisc + */ + static uint asisdmisc(uint U, uint size, uint opcode, reg_t Rn, reg_t Rd) + { + uint ins = (1 << 30) | + (U << 29) | + (0x1E << 24) | + (size << 22) | + (0x10 << 17) | + (opcode << 12) | + (2 << 10) | + (Rn << 5) | + Rd; + return ins; + } + + /* FCVTZS , https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html + * Scalar single-precision and double-precision + */ + static uint fcvtzs_asisdmisc(uint sz, reg_t Rn, reg_t Rd) { return asisdmisc(0, 2|sz, 0x1B, Rn, Rd); } + + /* FCVTZU , https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html + * Scalar single-precision and double-precision + */ + static uint fcvtzu_asisdmisc(uint sz, reg_t Rn, reg_t Rd) { return asisdmisc(1, 2|sz, 0x1B, Rn, Rd); } + + + /* Advanced SIMD scalar pairwise * Advanced SIMD scalar three different * Advanced SIMD scalar three same * Advanced SIMD scalar shift by immediate @@ -576,6 +610,16 @@ struct INSTR */ static uint cnt_advsimd(uint Q, uint size, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 0, size, 5, Rn, Rd); } + /* FCVTZS .,. https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html + * Vector single-precision and double-precision + */ + static uint fcvtzs_asimdmisc(uint Q, uint sz, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 0, 2|sz, 0x1B, Rn, Rd); } + + /* FCVTZU .,. https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_advsimd_int.html + * Vector single-precision and double-precision + */ + static uint fcvtzu_asimdmisc(uint Q, uint sz, reg_t Rn, reg_t Rd) { return asimdmisc(Q, 1, 2|sz, 0x1B, Rn, Rd); } + /* Advanced SIMD across lanes * https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#asimdall */ @@ -623,7 +667,7 @@ struct INSTR * Conversion between floating-point and fixed-point */ - /* Converstion between floating-point and integer https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#float2int + /* Conversion between floating-point and integer https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#float2int */ static uint float2int(uint sf, uint S, uint ftype, uint rmode, uint opcode, reg_t Rn, reg_t Rd) { @@ -651,6 +695,14 @@ struct INSTR return float2int(sf, 0, ftype, 0, 1, Rn, Rd); } + /* FCVTZS (scalar, integer) https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_float_int.html + */ + static uint fcvtzs(uint sf, uint ftype, reg_t Rn, reg_t Rd) { return float2int(sf, 0, ftype, 3, 0, Rn, Rd); } + + /* FCVTZU (scalar, integer) https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzu_float_int.html + */ + static uint fcvtzu(uint sf, uint ftype, reg_t Rn, reg_t Rd) { return float2int(sf, 0, ftype, 3, 1, Rn, Rd); } + /* Floating-point data-processing (1 source) * https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#floatdp1 */ diff --git a/compiler/src/dmd/backend/x86/cod4.d b/compiler/src/dmd/backend/x86/cod4.d index 981d78f34c..177ce1ce76 100644 --- a/compiler/src/dmd/backend/x86/cod4.d +++ b/compiler/src/dmd/backend/x86/cod4.d @@ -3216,6 +3216,7 @@ ret: @trusted void longcmp(ref CodeBuilder cdb, elem* e, bool jcond, FL fltarg, code* targ) { + assert(!cgstate.AArch64); // <= > < >= static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];