implement cdpopcnt for AArch64 (#20787)

This commit is contained in:
Walter Bright 2025-01-27 06:33:15 -08:00 committed by GitHub
parent b289b6a6dc
commit 2ca7960029
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 35 additions and 43 deletions

View file

@ -1905,7 +1905,6 @@ void cdmsw(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
// cdbtst // cdbtst
// cdbt // cdbt
// cdbscan // cdbscan
// cdpopcnt
/************************ /************************
* OPpopcnt operator * OPpopcnt operator
@ -1922,51 +1921,33 @@ void cdpopcnt(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
return; return;
} }
/*
fmov d31, x0 9e67001f
cnt v31.8b, v31.8b 0e205bff
addv b31, v31.8b 132603e0
//umov w0, v0.b[0]
fmov w0,s31 1e2603e0
*/
if (e) assert(0);
const tyml = tybasic(e.E1.Ety); const tyml = tybasic(e.E1.Ety);
const sz = _tysize[tyml]; const sz = _tysize[tyml];
assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op assert(sz == 8); // popcnt only operates on 64 bits
if (tyfloating(tyml))
code cs = void;
if ((e.E1.Eoper == OPind && !e.E1.Ecount) || e.E1.Eoper == OPvar)
{ {
getlvalue(cdb, cs, e.E1, 0, RM.load); // get addressing mode assert(0);
}
else
{
regm_t retregs = cgstate.allregs;
codelem(cgstate,cdb,e.E1, retregs, false);
const reg = findreg(retregs);
cs.Irm = modregrm(3,0,reg & 7);
cs.Iflags = 0;
cs.Irex = 0;
if (reg & 8)
cs.Irex |= REX_B;
} }
regm_t retregs = pretregs & cgstate.allregs; const posregs = cgstate.allregs;
if (!retregs) regm_t retregs1 = posregs;
retregs = cgstate.allregs; codelem(cgstate,cdb,e.E1,retregs1,false);
const reg = allocreg(cdb,retregs, e.Ety);
cs.Iop = POPCNT; // POPCNT reg,EA regm_t retregs = pretregs & cg.allregs;
code_newreg(&cs, reg); if (retregs == 0) /* if no return regs speced */
if (sz == SHORTSIZE) /* (like if wanted flags only) */
cs.Iflags |= CFopsize; retregs = ALLREGS & posregs; // give us some
if (pretregs & mPSW) reg_t Rd = allocreg(cdb, retregs, tyml); // destination register
cs.Iflags |= CFpsw;
cdb.gen(&cs); const R1 = findreg(retregs1); // source register
if (sz == 8)
code_orrex(cdb.last(), REX_W); regm_t vregs = ALLREGS; // floating point register
pretregs &= mBP | ALLREGS; // flags already set reg_t Vx = allocreg(cdb, vregs, TYdouble);
cdb.gen1(INSTR.fmov_float_gen(1,1,0,7,R1,Vx)); // FMOV Dx,X1
cdb.gen1(INSTR.cnt_advsimd(0,0,Vx,Vx)); // CNT Vx.8b,Vx.8b
cdb.gen1(INSTR.addv_advsimd(0,0,Vx,Vx)); // ADDV Bx,Vx.8b
cdb.gen1(INSTR.fmov_float_gen(0,0,0,6,Vx,Rd)); // FMOV Wd,Sx
fixresult(cdb,e,retregs,pretregs); fixresult(cdb,e,retregs,pretregs);
} }

View file

@ -1815,8 +1815,8 @@ void disassemble(uint c) @trusted
if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html if (U == 0 && size == 0 && opcode == 0x05) // https://www.scs.stanford.edu/~zyedidia/arm64/cnt_advsimd.html
{ {
p1 = "cnt"; // cnt <Vd>.<T>, <Vn>.<T> p1 = "cnt"; // cnt <Vd>.<T>, <Vn>.<T>
p2 = vregString(rbuf[0 .. 7], Rd, Q); p2 = vregString(rbuf[0 .. 7], Q, Rd);
p3 = vregString(rbuf[8 .. 14], Rn, Q); p3 = vregString(rbuf[8 .. 14], Q, Rn);
//printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr); //printf("p2: %.*s p3: %.*s\n", cast(int)p2.length, p2.ptr, cast(int)p3.length, p3.ptr);
} }
else if (U == 0 && (size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html else if (U == 0 && (size & 2) && opcode == 0x1B) // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_advsimd_int.html
@ -1945,6 +1945,11 @@ void disassemble(uint c) @trusted
p2 = regString(sf,Rd); p2 = regString(sf,Rd);
p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn); p3 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rn);
} }
else if (sf == 1 && ftype == 1 && rmode == 0 && opcode == 7)
{
p2 = fregString(rbuf[4 .. 8],"sd h"[ftype],Rd);
p3 = regString(sf,Rn);
}
} }
} }
else else

View file

@ -423,7 +423,8 @@ extern (C++) struct Target
extern (C++) void _init(ref const Param params) extern (C++) void _init(ref const Param params)
{ {
// isX86_64 and cpu are initialized in parseCommandLine // isX86_64 and cpu are initialized in parseCommandLine
isX86 = !isX86_64; //printf("isX86_64 %d isAArch64 %d\n", isX86_64, isAArch64);
isX86 = !isX86_64 && !isAArch64;
assert(isX86 + isX86_64 + isAArch64 == 1); // there can be only one assert(isX86 + isX86_64 + isAArch64 == 1); // there can be only one
this.params = &params; this.params = &params;

View file

@ -704,6 +704,11 @@ private int softPopcnt(N)(N x) pure
return cast(int) x; return cast(int) x;
} }
version (DigitalMars) version (AArch64)
{
int _popcnt(ulong x) pure;
}
version (DigitalMars) version (AnyX86) version (DigitalMars) version (AnyX86)
{ {
/** /**