Kill off more codegen stuff for ancient x86 cpus

This commit is contained in:
Ben Jones 2024-05-02 21:52:04 -06:00 committed by The Dlang Bot
parent afc38e9dca
commit e5d7779794
10 changed files with 86 additions and 313 deletions

View file

@ -99,8 +99,6 @@ private void getlvalue87(ref CodeBuilder cdb, ref code pcs,elem *e,regm_t keepms
e.EV.Vsym.Sflags &= ~GTregcand;
getlvalue(cdb, &pcs, e, keepmsk);
if (ADDFWAIT())
pcs.Iflags |= CFwait;
if (I32)
pcs.Iflags &= ~CFopsize;
else if (I64)
@ -228,7 +226,6 @@ void push87(ref CodeBuilder cdb, int line, const(char)* file)
{
const i = getemptyslot(global87.save, global87.stack[7]);
cdb.genf2(0xD9,0xF6); // FDECSTP
genfwait(cdb);
ndp_fstp(cdb, i, global87.stack[7].e.Ety); // FSTP i[BP]
assert(global87.stackused == 8);
if (NDPP) printf("push87() : overflow\n");
@ -336,7 +333,6 @@ L1:
break;
}
push87(cdb);
genfwait(cdb);
ndp_fld(cdb, j, e.Ety); // FLD j[BP]
if (!(flag & 1))
{
@ -358,20 +354,15 @@ L1:
@trusted
void save87(ref CodeBuilder cdb)
{
bool any = false;
while (global87.stack[0].e && global87.stackused)
{
// Save it
const i = getemptyslot(global87.save, global87.stack[0]);
if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e, cast(int)i);
genfwait(cdb);
ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP]
pop87();
any = true;
}
if (any) // if any stores
genfwait(cdb); // wait for last one to finish
}
/******************************************
@ -388,7 +379,6 @@ void save87regs(ref CodeBuilder cdb, uint n)
for (uint k = 8; k > j; k--)
{
cdb.genf2(0xD9,0xF6); // FDECSTP
genfwait(cdb);
if (k <= global87.stackused)
{
const i = getemptyslot(global87.save, global87.stack[k - 1]);
@ -400,8 +390,8 @@ void save87regs(ref CodeBuilder cdb, uint n)
for (uint k = 8; k > j; k--)
{
if (k > global87.stackused)
{ cdb.genf2(0xD9,0xF7); // FINCSTP
genfwait(cdb);
{
cdb.genf2(0xD9,0xF7); // FINCSTP
}
}
global87.stackused = j;
@ -507,18 +497,6 @@ void comsub87(ref CodeBuilder cdb,elem *e, ref regm_t outretregs)
}
}
/*******************************
* Decide if we need to gen an FWAIT.
*/
public void genfwait(ref CodeBuilder cdb)
{
if (ADDFWAIT())
cdb.gen1(FWAIT);
}
/***************************
* Put the 8087 flags into the CPU flags.
*/
@ -531,14 +509,7 @@ private void cg87_87topsw(ref CodeBuilder cdb)
*/
assert(!NOSAHF);
getregs(cdb,mAX);
if (config.target_cpu >= TARGET_80286)
cdb.genf2(0xDF,0xE0); // FSTSW AX
else
{
cdb.genfltreg(0xD8+5,7,0); // FSTSW floatreg[BP]
genfwait(cdb); // FWAIT
cdb.genfltreg(0x8A,4,1); // MOV AH,floatreg+1[BP]
}
cdb.genf2(0xDF,0xE0); // FSTSW AX
cdb.gen1(0x9E); // SAHF
code_orflag(cdb.last(),CFpsw);
}
@ -597,7 +568,7 @@ private void genftst(ref CodeBuilder cdb,elem *e,int pop)
pop87();
}
}
else if (config.target_cpu >= TARGET_80386)
else
{
// FUCOMP doesn't raise exceptions on QNANs, unlike FTST
push87(cdb);
@ -608,18 +579,6 @@ private void genftst(ref CodeBuilder cdb,elem *e,int pop)
pop87();
cg87_87topsw(cdb); // put 8087 flags in CPU flags
}
else
{
// Call library function which does not raise exceptions
regm_t regm = 0;
callclib(cdb,e,CLIB.ftest,&regm,0);
if (pop)
{
cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
}
}
/*************************************
@ -738,11 +697,11 @@ ubyte loadconst(elem *e, int im)
// since FLDZ loads a +0
assert(sz <= zeros.length);
zero = (memcmp(p, zeros.ptr, sz) == 0);
if (zero && config.target_cpu >= TARGET_PentiumPro)
if (zero)
return 0xEE; // FLDZ is the only one with 1 micro-op
// For some reason, these instructions take more clocks
if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
if (config.flags4 & CFG4speed)
return 0;
if (zero)
@ -847,7 +806,6 @@ void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs, ref regm_t outretre
// FSTP floatreg
pop87();
cdb.genfltreg(ESC(mf,1),3,0);
genfwait(cdb);
const reg = allocreg(cdb,outretregs,(sz == FLOATSIZE) ? TYfloat : TYdouble);
if (sz == FLOATSIZE)
{
@ -911,7 +869,6 @@ void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs, ref regm_t outretre
// FSTP floatreg
pop87();
cdb.genfltreg(ESC(mf,1),3,0);
genfwait(cdb);
// MOVD XMM?,floatreg
const reg = allocreg(cdb,outretregs,(sz == FLOATSIZE) ? TYfloat : TYdouble);
cdb.genxmmreg(xmmload(tym),reg,0,tym);
@ -1090,38 +1047,23 @@ void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
}
else
{
if (e2.Eoper == OPconst && !boolres(e2) &&
config.target_cpu < TARGET_80386)
note87(e1,0,0);
load87(cdb,e2,0,retregs,e1,-1);
makesure87(cdb,e1,0,1,0);
resregm = 0;
if (NOSAHF)
{
regm_t regm = 0;
callclib(cdb,e,CLIB.ftest0,&regm,0);
cdb.gen2(0xDF,0xE9); // FUCOMIP ST1
pop87();
cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else
{
note87(e1,0,0);
load87(cdb,e2,0,retregs,e1,-1);
makesure87(cdb,e1,0,1,0);
resregm = 0;
if (NOSAHF)
{
cdb.gen2(0xDF,0xE9); // FUCOMIP ST1
pop87();
cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else if (config.target_cpu >= TARGET_80386)
{
cdb.gen2(0xDA,0xE9); // FUCOMPP
cg87_87topsw(cdb);
pop87();
pop87();
}
else
// Call a function instead so that exceptions
// are not generated.
callclib(cdb,e,CLIB.fcompp,&resregm,0);
cdb.gen2(0xDA,0xE9); // FUCOMPP
cg87_87topsw(cdb);
pop87();
pop87();
}
}
@ -1629,10 +1571,7 @@ void load87(ref CodeBuilder cdb,elem *e,uint eoffset,ref regm_t outretregs,elem
assert(!(NOSAHF && op == 3));
elem_debug(e);
if (ADDFWAIT())
cs.Iflags = CFwait;
else
cs.Iflags = 0;
cs.Iflags = 0;
cs.Irex = 0;
OPER opr = oprev[op + 1];
tym_t ty = tybasic(e.Ety);
@ -2068,7 +2007,6 @@ void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
}
}
}
genfwait(cdb);
freenode(e.EV.E1);
fixresult87(cdb,e,mST0 | mPSW,*pretregs);
}
@ -2088,7 +2026,7 @@ void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
//printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
assert(e.Eoper == OPeq);
cs.Iflags = ADDFWAIT() ? CFwait : 0;
cs.Iflags = 0;
cs.Irex = 0;
regm_t retregs = mST01 | (*pretregs & mPSW);
codelem(cdb,e.EV.E2,&retregs,false);
@ -2133,13 +2071,11 @@ void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
cs.Irm |= modregrm(0, op2, 0);
makesure87(cdb,e.EV.E2, sz, 0, 0);
cdb.gen(&cs);
genfwait(cdb);
makesure87(cdb,e.EV.E2, 0, 1, 0);
}
else
{
loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0);
genfwait(cdb);
}
if (fxch)
cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1)
@ -2187,7 +2123,6 @@ void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
cdb.gen(&cs);
}
}
genfwait(cdb);
freenode(e.EV.E1);
fixresult_complex87(cdb, e,mST01 | mPSW,*pretregs);
}
@ -2229,16 +2164,14 @@ private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
}
freenode(e.EV.E2);
genfwait(cdb);
genSetRoundingMode(cdb, CW.roundto0); // FLDCW roundto0
pop87();
cs.Iflags = ADDFWAIT() ? CFwait : 0;
cs.Iflags = 0;
if (e.EV.E1.Eoper == OPvar)
notreg(e.EV.E1); // cannot be put in register anymore
loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
genfwait(cdb);
genSetRoundingMode(cdb, CW.roundtonearest); // FLDCW roundtonearest
freenode(e.EV.E1);
@ -2374,7 +2307,6 @@ public void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
NEWREG(cs.Irm,op2); // FSTx e.EV.E1
freenode(e.EV.E1);
cdb.gen(&cs);
genfwait(cdb);
fixresult87(cdb,e,mST0 | mPSW,*pretregs);
}
@ -2460,7 +2392,6 @@ private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs)
retregs = 0;
}
freenode(e.EV.E1);
genfwait(cdb);
fixresult_complex87(cdb,e,retregs,*pretregs);
}
@ -2658,7 +2589,6 @@ private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
}
L3:
freenode(e.EV.E1);
genfwait(cdb);
fixresult_complex87(cdb,e,retregs,*pretregs);
return;
@ -2889,7 +2819,6 @@ void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
pop87();
cs.IEV1.Voffset -= sz;
cdb.gen(&cs); // FSTP e.EV.E1
genfwait(cdb);
freenode(e.EV.E1);
fixresult_complex87(cdb, e, mST01, *pretregs);
return;
@ -2910,7 +2839,6 @@ void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
NEWREG(cs.Irm,reg);
pop87();
cdb.gen(&cs); // FSTP e.EV.E1
genfwait(cdb);
freenode(e.EV.E1);
fixresult87(cdb,e,mPSW | mST0,*pretregs);
}
@ -3228,11 +3156,8 @@ void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
cdb.gen1(0x50 + AX); // PUSH EAX
else
cod3_stackadj(cdb, szpush);
genfwait(cdb);
cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
genfwait(cdb);
if (config.flags3 & CFG3pic)
{
cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW.roundto0); // MOV szoff+2[ESP], CW.roundto0
@ -3244,7 +3169,6 @@ void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
pop87();
genfwait(cdb);
cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP]
retregs = *pretregs & (ALLREGS | mBP);
@ -3252,7 +3176,6 @@ void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
retregs = ALLREGS;
reg = allocreg(cdb,retregs,tym);
genfwait(cdb); // FWAIT
cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
if (szoff > REGSIZE)
@ -3274,7 +3197,6 @@ void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
retregs = mST0;
codelem(cdb,e.EV.E1,&retregs,false);
genfwait(cdb);
genSetRoundingMode(cdb, CW.roundto0); // FLDCW roundto0
pop87();
@ -3284,8 +3206,6 @@ void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
retregs = ALLREGS;
reg = allocreg(cdb,retregs,tym);
genfwait(cdb);
if (sz > REGSIZE)
{
cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
@ -3340,7 +3260,6 @@ void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
if (!retregs)
retregs = ALLREGS;
const reg = allocreg(cdb,retregs,tym);
genfwait(cdb); // FWAIT
if (tysize(tym) > REGSIZE)
{
cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
@ -3672,7 +3591,6 @@ void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs, ref regm_t
cdb.genfltreg(ESC(MFfloat,1),BX,4); // FSTP floatreg
pop87();
cdb.genfltreg(ESC(MFfloat,1),BX,0); // FSTP floatreg+4
genfwait(cdb);
const reg = findreg(outretregs);
getregs(cdb,reg);
cdb.genfltreg(LOD, reg, 0); // MOV ECX,floatreg
@ -3684,13 +3602,11 @@ void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs, ref regm_t
genctst(cdb,e,0); // FTST
pop87();
cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg
genfwait(cdb);
getregs(cdb,mDX|mAX);
cdb.genfltreg(LOD, DX, 0); // MOV EDX,floatreg
pop87();
cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg
genfwait(cdb);
cdb.genfltreg(LOD, AX, 0); // MOV EAX,floatreg
}
else if (tym == TYcfloat && retregs & (mAX|mDX) && outretregs & mST01)
@ -3716,13 +3632,11 @@ void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs, ref regm_t
genctst(cdb,e,0); // FTST
pop87();
cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg
genfwait(cdb);
getregs(cdb,mXMM0|mXMM1);
cdb.genxmmreg(xop,XMM1,0,tyf);
pop87();
cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg
genfwait(cdb);
cdb.genxmmreg(xop, XMM0, 0, tyf); // MOVD XMM0,floatreg
}
else if ((tym == TYcfloat || tym == TYcdouble) &&
@ -3815,8 +3729,6 @@ void cload87(ref CodeBuilder cdb, elem *e, ref regm_t outretregs)
//printf("cload87(e = %p, outretregs = %s)\n", e, regm_str(outretregs));
sz = _tysize[ty] / 2;
memset(&cs, 0, cs.sizeof);
if (ADDFWAIT())
cs.Iflags = CFwait;
switch (ty)
{
case TYcfloat: mf = MFfloat; break;
@ -3928,7 +3840,6 @@ void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
const sz = _tysize[tym];
uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
cdb.genfltreg(ESC(mf,1),3,0); // FSTP float/double ptr fltreg
genfwait(cdb);
cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg
}
fixresult87(cdb, e, retregs, *pretregs);

View file

@ -933,7 +933,6 @@ else
{
if (I16 ||
!(config.flags4 & CFG4speed) ||
config.target_cpu < TARGET_Pentium ||
farfunc ||
config.flags & CFGstack ||
xlocalsize >= 0x1000 ||
@ -2873,19 +2872,7 @@ void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool
}
/* which registers can we use to save other registers in? */
if (config.flags4 & CFG4space || // if optimize for space
config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle
touse = 0; // PUSH/POP pairs are always shorter
else
{
touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
/* Don't use registers we'll have to save/restore */
touse &= ~(fregsaved & oldmfuncreg);
/* Don't use registers that have constant values in them, since
the code generated might have used the value.
*/
touse &= ~oldregimmed;
}
touse = 0; // PUSH/POP pairs are always shorter
CodeBuilder cdbs1; cdbs1.ctor();
code *cs2 = null;

View file

@ -2694,8 +2694,7 @@ private elem * eldiv(elem *e, goal_t goal)
int pow2;
if (e2.Eoper == OPconst &&
!uns &&
(pow2 = ispow2(el_tolong(e2))) != -1 &&
!(config.target_cpu < TARGET_80286 && pow2 != 1 && e.Eoper == OPdiv)
(pow2 = ispow2(el_tolong(e2))) != -1
)
{ }
else

View file

@ -617,12 +617,6 @@ Obj OmfObj_init(OutBuffer *objbuf, const(char)* filename, const(char)* csegname)
obj.csegattr = SEG_ATTR(SEG_ALIGN2, SEG_C_PUBLIC,0,USE16);
}
if (config.flags4 & CFG4speed && // if optimized for speed
config.target_cpu == TARGET_80486)
// 486 is only CPU that really benefits from alignment
obj.csegattr = I32 ? SEG_ATTR(SEG_ALIGN16, SEG_C_PUBLIC,0,USE32)
: SEG_ATTR(SEG_ALIGN16, SEG_C_PUBLIC,0,USE16);
SegData.reset(); // recycle memory
getsegment(); // element 0 is reserved

View file

@ -40,10 +40,6 @@ nothrow:
// assembler.
private bool is32bitaddr(bool x, uint Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) != 0)); }
// If we use Pentium Pro scheduler
@trusted
private bool PRO() { return config.target_cpu >= TARGET_PentiumPro; }
private enum FP : ubyte
{
none = 0,
@ -133,18 +129,11 @@ struct Cinfo
@trusted
private void cgsched_pentium(code **pc,regm_t scratch)
{
//printf("scratch = x%02x\n",scratch);
if (config.target_scheduler >= TARGET_80486)
if (!I64)
*pc = peephole(*pc,0);
if (I32) // forget about 16 bit code
{
if (!I64)
*pc = peephole(*pc,0);
if (I32) // forget about 16 bit code
{
if (config.target_cpu == TARGET_Pentium ||
config.target_cpu == TARGET_PentiumMMX)
*pc = simpleops(*pc,scratch);
*pc = schedule(*pc,0);
}
*pc = schedule(*pc,0);
}
}
@ -155,7 +144,6 @@ private void cgsched_pentium(code **pc,regm_t scratch)
public void cgsched_block(block* b)
{
if (config.flags4 & CFG4speed &&
config.target_cpu >= TARGET_Pentium &&
b.BC != BCasm)
{
regm_t scratch = allregs;
@ -1282,13 +1270,8 @@ private Cinfo getinfo(code *c)
Cinfo ci;
ci.c = c;
if (PRO)
{
ci.uops = uops(c);
ci.isz = cast(ubyte)calccodsize(c);
}
else
ci.pair = cast(ubyte)pair_class(c);
ci.uops = uops(c);
ci.isz = cast(ubyte)calccodsize(c);
ubyte op;
ubyte op2;
@ -2177,10 +2160,6 @@ Lconflict:
//printf("r1=%x, w1=%x, r2=%x, w2=%x\n",r1,w1,r2,w2);
delay_clocks = 0;
// Determine if AGI
if (!PRO && pair_agi(*ci1, *ci2))
delay_clocks = 1;
// Special delays for floating point
if (fpsched)
{ if (ci1.fp_op == FP.fld && ci2.fp_op == FP.fstp)
@ -2190,27 +2169,12 @@ Lconflict:
else if (ci1.fp_op == FP.fop && ci2.fp_op == FP.fop)
delay_clocks = 2;
}
else if (PRO)
else
{
// Look for partial register write stalls
if (w1 & r2 & ALLREGS && sz1 < sz2)
delay_clocks = 7;
}
else if ((w1 | r1) & (w2 | r2) & (C | S))
{
int op = c1.Iop;
int reg = c1.Irm & modregrm(0,7,0);
if (ci1.fp_op == FP.fld ||
(op == 0xD9 && (c1.Irm & 0xF8) == 0xC0)
)
{ } // FLD
else if (op == 0xD9 && (c1.Irm & 0xF8) == 0xC8)
{ } // FXCH
else if (c2.Iop == 0xD9 && (c2.Irm & 0xF8) == 0xC8)
{ } // FXCH
else
delay_clocks = 3;
}
if (i) printf("conflict %d\n\n",delay_clocks);
return 0x100 + delay_clocks;
@ -2275,18 +2239,12 @@ code **assemble(code **pc) // reassemble scheduled instructions
debug
if (debugs)
{
if (PRO)
{ immutable char[4][3] tbl = [ "0 "," 1 "," 2" ];
immutable char[4][3] tbl = [ "0 "," 1 "," 2" ];
if (ci)
printf("%s %d ",tbl[i - ((i / 3) * 3)].ptr,ci.uops);
else
printf("%s ",tbl[i - ((i / 3) * 3)].ptr);
}
if (ci)
printf("%s %d ",tbl[i - ((i / 3) * 3)].ptr,ci.uops);
else
{
printf((i & 1) ? " V " : "U ");
}
printf("%s ",tbl[i - ((i / 3) * 3)].ptr);
if (ci)
ci.c.print();
else
@ -2457,25 +2415,8 @@ int insert(Cinfo *ci)
// Move forward the delay clocks
if (clocks == 0)
j = i + 1;
else if (PRO)
j = (((i + 3) / 3) * 3) + clocks * 3;
else
{ j = ((i + 2) & ~1) + clocks * 2;
// It's possible we skipped over some AGI generating
// instructions due to movesp.
int k;
for (k = i + 1; k < j; k++)
{
if (k >= TBLMAX)
goto Lnoinsert;
if (tbl[k] && pair_agi(*tbl[k], *ci))
{
k = ((k + 2) & ~1) + 1;
}
}
j = k;
}
j = (((i + 3) / 3) * 3) + clocks * 3;
if (j >= TBLMAX) // exceed table size?
goto Lnoinsert;
@ -2488,90 +2429,61 @@ int insert(Cinfo *ci)
// Scan forward looking for a hole to put it in
for (i = imin; i < TBLMAX; i++)
{
if (tbl[i])
if (!tbl[i])
{
// In case, due to movesp, we skipped over some AGI instructions
if (!PRO && pair_agi(*tbl[i], *ci))
{
i = ((i + 2) & ~1) + 1;
if (i >= TBLMAX)
goto Lnoinsert;
}
}
else
{
if (PRO)
{ int i0 = (i / 3) * 3; // index of decode unit 0
Cinfo *ci0;
assert(((TBLMAX / 3) * 3) == TBLMAX);
switch (i - i0)
{
case 0: // i0 can handle any instruction
goto Linsert;
case 1:
ci0 = tbl[i0];
if (ci.uops > 1)
{
if (i0 >= imin && ci0.uops == 1)
goto L1;
i++;
break;
}
if (triple_test(*ci0,*ci,tbl[i0 + 2]))
goto Linsert;
break;
case 2:
ci0 = tbl[i0];
if (ci.uops > 1)
{
if (i0 >= imin && ci0.uops == 1)
{
if (i >= tblmax)
{ if (i + 1 >= TBLMAX)
goto Lnoinsert;
tblmax = i + 1;
}
tbl[i0 + 2] = tbl[i0 + 1];
tbl[i0 + 1] = ci0;
i = i0;
goto Linsert;
}
break;
}
if (tbl[i0 + 1] && triple_test(*ci0,*tbl[i0 + 1],ci))
goto Linsert;
break;
default:
assert(0);
}
}
else
int i0 = (i / 3) * 3; // index of decode unit 0
Cinfo *ci0;
assert(((TBLMAX / 3) * 3) == TBLMAX);
switch (i - i0)
{
assert((TBLMAX & 1) == 0);
if (i & 1) // if V pipe
{
if (pair_test(*tbl[i - 1], *ci))
{
goto Linsert;
}
else if (i > imin && pair_test(*ci, *tbl[i - 1]))
{
L1:
tbl[i] = tbl[i - 1];
if (i >= tblmax)
tblmax = i + 1;
i--;
//printf("\tswapping with x%02x\n",tbl[i + 1].c.Iop);
goto Linsert;
}
}
else // will always fit in U pipe
{
assert(!tbl[i + 1]); // because V pipe should be empty
case 0: // i0 can handle any instruction
goto Linsert;
}
case 1:
ci0 = tbl[i0];
if (ci.uops > 1)
{
if (i0 >= imin && ci0.uops == 1)
{
tbl[i] = tbl[i - 1];
if (i >= tblmax)
tblmax = i + 1;
i--;
//printf("\tswapping with x%02x\n",tbl[i + 1].c.Iop);
goto Linsert;
}
i++;
break;
}
if (triple_test(*ci0,*ci,tbl[i0 + 2]))
goto Linsert;
break;
case 2:
ci0 = tbl[i0];
if (ci.uops > 1)
{
if (i0 >= imin && ci0.uops == 1)
{
if (i >= tblmax)
{ if (i + 1 >= TBLMAX)
goto Lnoinsert;
tblmax = i + 1;
}
tbl[i0 + 2] = tbl[i0 + 1];
tbl[i0 + 1] = ci0;
i = i0;
goto Linsert;
}
break;
}
if (tbl[i0 + 1] && triple_test(*ci0,*tbl[i0 + 1],ci))
goto Linsert;
break;
default:
assert(0);
}
}
}

View file

@ -725,8 +725,6 @@ void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t of
cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway
}
}
else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT())
cs.Iflags |= CFwait;
L2:
getregs(cdb, desmsk); // save any regs we destroy
@ -3619,9 +3617,7 @@ void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
pop87();
pop87();
cdb.genfltreg(0xD9, 3, tysize(TYfloat));
genfwait(cdb);
cdb.genfltreg(0xD9, 3, 0);
genfwait(cdb);
// reload
if (config.exe == EX_WIN64)
{
@ -4145,11 +4141,9 @@ static if (0)
// reload real
push87(cdb);
cdb.genfltreg(0xD9, 0, 0);
genfwait(cdb);
// reload imaginary
push87(cdb);
cdb.genfltreg(0xD9, 0, tysize(TYfloat));
genfwait(cdb);
retregs = mST01;
}
@ -5035,7 +5029,6 @@ void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf)
}
if (LARGEDATA)
cdb.last().Iflags |= CFss; // want to store into stack
genfwait(cdb); // FWAIT
return;
}
else if (I16 && (tym == TYdouble || tym == TYdouble_alias))

View file

@ -1150,9 +1150,7 @@ static if (NTEXCEPTIONS)
pop87();
pop87();
cdb.genfltreg(0xD9, 3, tysize(TYfloat));
genfwait(cdb);
cdb.genfltreg(0xD9, 3, 0);
genfwait(cdb);
// reload
if (config.exe == EX_WIN64)
{

View file

@ -546,9 +546,6 @@ uint VEX3_B2(code.Svex ivex)
ivex.pp;
}
@trusted
bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
/************************************
*/

View file

@ -156,7 +156,6 @@ struct CodeBuilder
*/
void genf2(opcode_t op, uint rm)
{
genfwait(this);
gen2(op, rm);
}
@ -346,8 +345,6 @@ struct CodeBuilder
{
floatreg = true;
reflocal = true;
if ((opcode & ~7) == 0xD8)
genfwait(this);
genc1(opcode,modregxrm(2,reg,BPRM),FLfltreg,offset);
}

View file

@ -495,22 +495,7 @@ void cv_init()
// Put out S_COMPILE record
TOWORD(debsym.ptr + 2,S_COMPILE);
switch (config.target_cpu)
{
case TARGET_8086: debsym[4] = 0; break;
case TARGET_80286: debsym[4] = 2; break;
case TARGET_80386: debsym[4] = 3; break;
case TARGET_80486: debsym[4] = 4; break;
case TARGET_Pentium:
case TARGET_PentiumMMX:
debsym[4] = 5; break;
case TARGET_PentiumPro:
case TARGET_PentiumII:
debsym[4] = 6; break;
default: assert(0);
}
debsym[4] = 6;
debsym[5] = (CPP != 0); // 0==C, 1==C++
flags = (config.inline8087) ? (0<<3) : (1<<3);
if (I32)