From 93d2e53e94e16c4e7616d4a00c3e1574ae1d0ed1 Mon Sep 17 00:00:00 2001 From: Walter Bright Date: Thu, 17 Apr 2025 01:35:25 -0700 Subject: [PATCH] fix offsets in prolog_saveregs/restoreregs (#21251) --- compiler/src/dmd/backend/arm/cod3.d | 10 +++++--- compiler/src/dmd/backend/x86/cgcod.d | 35 ++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/compiler/src/dmd/backend/arm/cod3.d b/compiler/src/dmd/backend/arm/cod3.d index 30f5b46a3a..e60f4dfba3 100644 --- a/compiler/src/dmd/backend/arm/cod3.d +++ b/compiler/src/dmd/backend/arm/cod3.d @@ -341,7 +341,8 @@ void genBranch(ref CodeBuilder cdb, COND cond, FL fltarg, block* targ) @trusted void prolog_saveregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topush, int cfa_offset) { - printf("prolog_saveregs() topush: %s pushoffuse: %d\n", regm_str(topush), cg.pushoffuse); + //printf("prolog_saveregs() topush: %s pushoffuse: %d\n", regm_str(topush), cg.pushoffuse); + //printf("function: %s\n", funcsym_p.Sident.ptr); assert(!(topush & ~fregsaved)); assert(cg.pushoffuse || !topush); @@ -349,6 +350,7 @@ void prolog_saveregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topush, int cfa int xmmtopush = 0; int gptopush = popcnt(topush); // general purpose registers to save targ_size_t gpoffset = cg.pushoff + cg.BPoff; + gpoffset += localsize; reg_t fp; // frame pointer if (!cg.hasframe || cg.enforcealign) { @@ -391,6 +393,7 @@ void prolog_saveregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topush, int cfa @trusted private void epilog_restoreregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topop) { + //printf("prolog_restoreregs() topop: %s\n", regm_str(topop)); assert(cg.AArch64); assert(cg.pushoffuse || !topop); @@ -399,6 +402,7 @@ private void epilog_restoreregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topo int xmmtopop = popcnt(topop & XMMREGS); // XMM regs take 16 bytes int gptopop = popcnt(topop); // general purpose registers to save targ_size_t gpoffset = cg.pushoff + cg.BPoff; + gpoffset += localsize; reg_t fp; if (!cg.hasframe || cg.enforcealign) @@ -416,7 +420,7 @@ private void epilog_restoreregs(ref CGstate cg, ref CodeBuilder cdb, regm_t topo const ins = (mask(reg) & INSTR.FLOATREGS) // https://www.scs.stanford.edu/~zyedidia/arm64/ldr_imm_fpsimd.html - ? INSTR.ldr_imm_fpsimd(3,0,cast(uint)gpoffset >> 3,fp,reg) // LDR reg,[fp,#offset] + ? INSTR.ldr_imm_fpsimd(3,1,cast(uint)gpoffset >> 3,fp,reg) // LDR reg,[fp,#offset] : INSTR.ldr_imm_gen(1, reg, fp, gpoffset); // LDR reg,[fp,#offset] cdb.gen1(ins); gpoffset += REGSIZE; @@ -705,7 +709,7 @@ void epilog(block* b) * order they were pushed. */ topop = fregsaved & ~cgstate.mfuncreg; -// epilog_restoreregs(cdbx, topop); // implement + epilog_restoreregs(cgstate, cdbx, topop); if (cgstate.usednteh & NTEHjmonitor) { diff --git a/compiler/src/dmd/backend/x86/cgcod.d b/compiler/src/dmd/backend/x86/cgcod.d index 873cb23546..d26ed9e298 100644 --- a/compiler/src/dmd/backend/x86/cgcod.d +++ b/compiler/src/dmd/backend/x86/cgcod.d @@ -795,13 +795,27 @@ else /* Instead of pushing the registers onto the stack one by one, * allocate space in the stack frame and copy/restore them there. */ - int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes - int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save - if (cg.NDPoff || xmmtopush || cg.funcarg.size) + if (cg.AArch64) { - cg.pushoff = alignsection(cg.pushoff - (gptopush * REGSIZE + xmmtopush * 16), - xmmtopush ? STACKALIGN : REGSIZE, bias); - cg.pushoffuse = true; // tell others we're using this strategy + //printf("topush: %s\n", regm_str(topush)); + int numtopush = popcnt(topush); + if (numtopush || cg.funcarg.size) + { + cg.pushoff = alignsection(cg.pushoff - numtopush * REGSIZE, + REGSIZE, bias); + cg.pushoffuse = true; // tell others we're using this strategy + } + } + else + { + int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes + int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save + if (cg.NDPoff || xmmtopush || cg.funcarg.size) + { + cg.pushoff = alignsection(cg.pushoff - (gptopush * REGSIZE + xmmtopush * 16), + xmmtopush ? STACKALIGN : REGSIZE, bias); + cg.pushoffuse = true; // tell others we're using this strategy + } } } @@ -823,8 +837,8 @@ else localsize = -cg.funcarg.offset; static if (0) - printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", - cast(long)cg.Alloca.offset, cast(long)CSE.size(), cast(long)cg.CSoff, cast(long)cg.NDPoff, cast(long)localsize); + printf("Alloca.offset: x%llx cstop: x%llx CSoff: x%llx NDPoff: x%llx pushoff: x%llx localsize: x%llx\n", + cast(long)cg.Alloca.offset, cast(long)CSE.size(), cast(long)cg.CSoff, cast(long)cg.NDPoff, cast(long)cg.pushoff, cast(long)localsize); assert(cast(targ_ptrdiff_t)localsize >= 0); // Keep the stack aligned by 8 for any subsequent function calls @@ -855,8 +869,8 @@ else cg.funcarg.offset = -localsize; static if (0) - printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", - cast(int)cg.Foff,cast(int)cg.Auto.size,cast(int)cg.NDPoff,cast(int)cg.CSoff,cast(int)cg.Para.size,cast(int)localsize); + printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x pushoff x%02x localsize x%02x\n", + cast(int)cg.Foff,cast(int)cg.Auto.size,cast(int)cg.NDPoff,cast(int)cg.CSoff,cast(int)cg.Para.size,cast(int)cg.pushoff,cast(int)localsize); uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals @@ -924,6 +938,7 @@ else } else if (cg.needframe) // if variables or parameters { + // xlocalsize can be adjusted for NTEXCEPTIONS==2 prolog_frame(cg, cdbx, farfunc, xlocalsize, enter, cfa_offset); cg.hasframe = true; }