add floating point fadd code gen (#20822)

* add floating point add

* add floating point fadd code gen
This commit is contained in:
Walter Bright 2025-02-05 11:08:10 -08:00 committed by GitHub
parent bf454f098e
commit d1d807deab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 108 additions and 24 deletions

View file

@ -2274,7 +2274,7 @@ static if (1)
int sz = _tysize[tym];
cs.Iflags = 0;
flags = outretregs & mPSW; /* save original */
forregs = outretregs & cgstate.allregs; // XMMREGS ?
forregs = outretregs & (cgstate.allregs | INSTR.FLOATREGS); // XMMREGS ?
//if (outretregs & mSTACK)
//forregs |= DOUBLEREGS;
if (e.Eoper == OPconst)

View file

@ -59,6 +59,7 @@ import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients;
void cdorth(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
{
//printf("cdorth(e = %p, pretregs = %s)\n",e,regm_str(pretregs));
//elem_print(e);
elem* e1 = e.E1;
elem* e2 = e.E2;
@ -75,29 +76,55 @@ void cdorth(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
const ty2 = tybasic(e2.Ety);
const sz = _tysize[ty];
if (tyfloating(ty1))
{
assert(0);
}
regm_t posregs = cg.allregs;
regm_t posregs = tyfloating(ty1) ? INSTR.FLOATREGS : cg.allregs;
regm_t retregs1 = posregs;
codelem(cg, cdb, e1, retregs1, false);
regm_t retregs2 = cg.allregs & ~retregs1;
scodelem(cg, cdb, e2, retregs2, retregs1, false);
regm_t retregs = pretregs & cg.allregs;
if (retregs == 0) /* if no return regs speced */
/* (like if wanted flags only) */
retregs = ALLREGS & posregs; // give us some
reg_t Rd = allocreg(cdb, retregs, ty);
reg_t Rn = findreg(retregs1);
regm_t retregs2 = posregs & ~retregs1;
//printf("retregs1: %s retregs2: %s\n", regm_str(retregs1), regm_str(retregs2));
static if (0)
{
scodelem(cg, cdb, e2, retregs2, retregs1, false);
}
else
{
retregs2 = mask(33);
}
reg_t Rm = findreg(retregs2);
regm_t retregs = pretregs & posregs;
if (retregs == 0) /* if no return regs speced */
retregs = posregs; // give us some
reg_t Rd = allocreg(cdb, retregs, ty);
regm_t PSW = pretregs & mPSW;
if (tyfloating(ty1))
{
uint ftype = sz == 2 ? 3 :
sz == 4 ? 0 : 1;
switch (e.Eoper)
{
// FADD/FSUB (extended register)
// http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#addsub_ext
case OPadd:
cdb.gen1(INSTR.fadd_float(ftype,Rm,Rn,Rd)); // FADD Rd,Rn,Rm
break;
case OPmin:
cdb.gen1(INSTR.fsub_float(ftype,Rm,Rn,Rd)); // FSUB Rd,Rn,Rm
break;
default:
assert(0);
}
pretregs = retregs | PSW;
fixresult(cdb,e,mask(Rd),pretregs);
return;
}
switch (e.Eoper)
{
// ADDS/SUBS (extended register)

View file

@ -737,8 +737,56 @@ struct INSTR
/* Floating-point compare
* Floating-point immediate
* Floating-point condistional compare
* Floating-point data-processing (2 source)
* Floating-point conditional select
*/
/* Floating-point data-processing (2 source) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#floatdp2
*/
static uint floatdp2(uint M, uint S, uint ftype, reg_t Vm, uint opcode, reg_t Vn, reg_t Vd)
{
assert(Vm >= 32 && Vn >= 32 && Vd >= 32);
reg_t Rm = Vm & 31;
reg_t Rn = Vn & 31;
reg_t Rd = Vd & 31;
return (M << 31) | (S << 29) | (0x1E << 24) | (ftype << 22) | (1 << 21) | (Rm << 16) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd;
}
/* FMUL (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fmul_float.html
*/
static uint fmul_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,0,Vn,Vd); }
/* FDIV (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fdiv_float.html
*/
static uint fdiv_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,1,Vn,Vd); }
/* FADD (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fadd_float.html
*/
static uint fadd_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,2,Vn,Vd); }
/* FSUB (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fsub_float.html
*/
static uint fsub_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,3,Vn,Vd); }
/* FMAX (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fmax_float.html
*/
static uint fmax_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,4,Vn,Vd); }
/* FMIN (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fmin_float.html
*/
static uint fmin_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,5,Vn,Vd); }
/* FMAXNM (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fmaxnm_float.html
*/
static uint fmaxnm_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,6,Vn,Vd); }
/* FMINNM (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fminnm_float.html
*/
static uint fminnm_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,7,Vn,Vd); }
/* FNMUL (scalar) https://www.scs.stanford.edu/~zyedidia/arm64/fnmul_float.html
*/
static uint fnmul_float(uint ftype, reg_t Vm, reg_t Vn, reg_t Vd) { return floatdp2(0,0,ftype,Vm,8,Vn,Vd); }
/* Floating-point conditional select
* Floating-point data-processing (3 source)
*/

View file

@ -153,7 +153,10 @@ const(char)* tym_str(tym_t ty)
const tyb = tybasic(ty);
if (tyb >= TYMAX)
{
printf("TY %x\n",cast(int)ty);
if (tyb == TYMAX)
printf("TY TYMAX\n");
else
printf("TY %x\n",cast(int)ty);
assert(0);
}
strcat(p, "TY");

View file

@ -40,6 +40,7 @@ import dmd.backend.ty;
import dmd.backend.type;
import dmd.backend.arm.disasmarm;
import dmd.backend.arm.instr;
import dmd.backend.x86.code_x86;
import dmd.backend.x86.disasm86;
@ -1620,7 +1621,7 @@ static if (0)
}
tym = tybasic(tym);
uint size = _tysize[tym];
outretregs &= mES | cgstate.allregs | XMMREGS;
outretregs &= mES | cgstate.allregs | XMMREGS | INSTR.FLOATREGS;
regm_t retregs = outretregs;
regm_t[] lastRetregs = cgstate.lastRetregs[];
@ -1630,7 +1631,7 @@ static if (0)
if ((retregs & cgstate.regcon.mvar) == retregs) // if exactly in reg vars
{
reg_t outreg;
if (size <= REGSIZE || (retregs & XMMREGS))
if (size <= REGSIZE || (retregs & XMMREGS) || (retregs & INSTR.FLOATREGS))
{
outreg = findreg(retregs);
assert(retregs == mask(outreg)); /* no more bits are set */
@ -3002,7 +3003,7 @@ const(char)* regm_str(regm_t rm)
{
char[4] buf = void;
char c = j < 32 ? 'r' : 'f';
sprintf(buf.ptr, "c%u", c, j);
sprintf(buf.ptr, "%c%u", c, j);
strcat(p, buf.ptr);
}
}

View file

@ -1142,7 +1142,8 @@ static if (NTEXCEPTIONS)
case BC.retexp:
reg_t reg1, reg2;
retregs = allocretregs(cgstate, e.Ety, e.ET, funcsym_p.ty(), reg1, reg2);
//printf("allocretregs returns %s\n", regm_str(mask(reg1) | mask(reg2)));
//printf("reg1: %d, reg2: %d\n", reg1, reg2);
//printf("allocretregs returns %llx %s\n", retregs, regm_str(retregs));
reg_t lreg = NOREG;
reg_t mreg = NOREG;
@ -1525,6 +1526,8 @@ regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t ty
assert(tyfb == TYjfunc && I32);
return ST01;
}
else if (AArch64 && tyfloating(tym))
return rralloc.fpt();
else if (tysimd(tym))
{
return rralloc.xmm();
@ -1559,6 +1562,8 @@ regm_t allocretregs(ref CGstate cgstate, const tym_t ty, type* t, const tym_t ty
reg1 = allocreg(ty1);
reg2 = allocreg(ty2);
//printf("reg1: %d reg2: %d NOREG: %d\n", reg1, reg2, NOREG);
//printf("reg1: %llx reg2: %llx ~NOREG: %llx\n", mask(reg1), mask(reg2), ~mask(NOREG));
return (mask(reg1) | mask(reg2)) & ~mask(NOREG);
}