1 /**
2  * Top level code for the code generator.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_cgcod.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d
11  */
12 
13 module dmd.backend.cgcod;
14 
15 version = FRAMEPTR;
16 
17 import core.bitop;
18 import core.stdc.stdio;
19 import core.stdc.stdlib;
20 import core.stdc.string;
21 
22 import dmd.backend.backend;
23 import dmd.backend.cc;
24 import dmd.backend.cdef;
25 import dmd.backend.code;
26 import dmd.backend.cgcse;
27 import dmd.backend.code_x86;
28 import dmd.backend.codebuilder;
29 import dmd.backend.disasm86;
30 import dmd.backend.dlist;
31 import dmd.backend.dvec;
32 import dmd.backend.melf;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.global;
36 import dmd.backend.obj;
37 import dmd.backend.oper;
38 import dmd.backend.pdata : win64_pdata;
39 import dmd.backend.rtlsym;
40 import dmd.backend.symtab;
41 import dmd.backend.ty;
42 import dmd.backend.type;
43 import dmd.backend.xmm;
44 
45 import dmd.backend.barray;
46 
47 
48 nothrow:
49 @safe:
50 
51 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
52 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
53 
54 enum MARS = true;
55 
56 import dmd.backend.dwarfdbginf : dwarf_except_gentables;
57 
58 __gshared
59 {
60 bool floatreg;                  // !=0 if floating register is required
61 
62 int hasframe;                   // !=0 if this function has a stack frame
63 bool enforcealign;              // enforced stack alignment
64 targ_size_t spoff;
65 targ_size_t Foff;               // BP offset of floating register
66 targ_size_t CSoff;              // offset of common sub expressions
67 targ_size_t NDPoff;             // offset of saved 8087 registers
68 targ_size_t pushoff;            // offset of saved registers
69 bool pushoffuse;                // using pushoff
70 int BPoff;                      // offset from BP
71 int EBPtoESP;                   // add to EBP offset to get ESP offset
72 LocalSection Para;              // section of function parameters
73 LocalSection Auto;              // section of automatics and registers
74 LocalSection Fast;              // section of fastpar
75 LocalSection EEStack;           // offset of SCstack variables from ESP
76 LocalSection Alloca;            // data for alloca() temporary
77 
78 REGSAVE regsave;
79 
80 CGstate cgstate;                // state of code generator
81 
82 regm_t BYTEREGS = BYTEREGS_INIT;
83 regm_t ALLREGS = ALLREGS_INIT;
84 
85 
86 /************************************
87  * # of bytes that SP is beyond BP.
88  */
89 
90 uint stackpush;
91 
92 int stackchanged;               /* set to !=0 if any use of the stack
93                                    other than accessing parameters. Used
94                                    to see if we can address parameters
95                                    with ESP rather than EBP.
96                                  */
97 int refparam;           // !=0 if we referenced any parameters
98 int reflocal;           // !=0 if we referenced any locals
99 bool anyiasm;           // !=0 if any inline assembler
100 char calledafunc;       // !=0 if we called a function
101 char needframe;         // if true, then we will need the frame
102                         // pointer (BP for the 8088)
103 char gotref;            // !=0 if the GOTsym was referenced
104 uint usednteh;              // if !=0, then used NT exception handling
105 bool calledFinally;     // true if called a BC_finally block
106 
107 /* Register contents    */
108 con_t regcon;
109 
110 BackendPass pass;
111 
112 private Symbol *retsym;          // set to symbol that should be placed in
113                                 // register AX
114 
115 /****************************
116  * Register masks.
117  */
118 
119 regm_t msavereg;        // Mask of registers that we would like to save.
120                         // they are temporaries (set by scodelem())
121 regm_t mfuncreg;        // Mask of registers preserved by a function
122 
123 regm_t allregs;                // ALLREGS optionally including mBP
124 
125 int dfoidx;                     /* which block we are in                */
126 
127 targ_size_t     funcoffset;     // offset of start of function
128 targ_size_t     prolog_allocoffset;     // offset past adj of stack allocation
129 targ_size_t     startoffset;    // size of function entry code
130 targ_size_t     retoffset;      /* offset from start of func to ret code */
131 targ_size_t     retsize;        /* size of function return              */
132 
133 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs;
134 
135 }
136 
137 /*********************************
138  * Generate code for a function.
139  * Note at the end of this routine mfuncreg will contain the mask
140  * of registers not affected by the function. Some minor optimization
141  * possibilities are here.
142  * Params:
143  *      sfunc = function to generate code for
144  */
145 @trusted
146 void codgen(Symbol *sfunc)
147 {
148     //printf("codgen('%s')\n",funcsym_p.Sident.ptr);
149     assert(sfunc == funcsym_p);
150     assert(cseg == funcsym_p.Sseg);
151 
152     cgreg_init();
153     CSE.initialize();
154     cod3_initregs();
155     allregs = ALLREGS;
156     pass = BackendPass.initial;
157     Alloca.initialize();
158     anyiasm = 0;
159 
160     if (config.ehmethod == EHmethod.EH_DWARF)
161     {
162         /* The dwarf unwinder relies on the function epilog to exist
163          */
164         for (block* b = startblock; b; b = b.Bnext)
165         {
166             if (b.BC == BCexit)
167                 b.BC = BCret;
168         }
169     }
170 
171 tryagain:
172     debug
173     if (debugr)
174         printf("------------------ PASS%s -----------------\n",
175             (pass == BackendPass.initial) ? "init".ptr : ((pass == BackendPass.reg) ? "reg".ptr : "final".ptr));
176 
177     lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0;
178 
179     // if no parameters, assume we don't need a stack frame
180     needframe = 0;
181     enforcealign = false;
182     gotref = 0;
183     stackchanged = 0;
184     stackpush = 0;
185     refparam = 0;
186     calledafunc = 0;
187     retsym = null;
188 
189     cgstate.stackclean = 1;
190     cgstate.funcarg.initialize();
191     cgstate.funcargtos = ~0;
192     cgstate.accessedTLS = false;
193     STACKALIGN = TARGET_STACKALIGN;
194 
195     regsave.reset();
196     memset(global87.stack.ptr,0,global87.stack.sizeof);
197 
198     calledFinally = false;
199     usednteh = 0;
200 
201     if (sfunc.Sfunc.Fflags3 & Fjmonitor &&
202         config.exe & EX_windos)
203         usednteh |= NTEHjmonitor;
204 
205     // Set on a trial basis, turning it off if anything might throw
206     sfunc.Sfunc.Fflags3 |= Fnothrow;
207 
208     floatreg = false;
209     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
210 
211     CSE.start();
212     memset(&regcon,0,regcon.sizeof);
213     regcon.cse.mval = regcon.cse.mops = 0;      // no common subs yet
214     msavereg = 0;
215     uint nretblocks = 0;
216     mfuncreg = fregsaved;               // so we can see which are used
217                                         // (bit is cleared each time
218                                         //  we use one)
219     assert(!(needframe && mfuncreg & mBP)); // needframe needs mBP
220 
221     for (block* b = startblock; b; b = b.Bnext)
222     {
223         memset(&b.Bregcon,0,b.Bregcon.sizeof);       // Clear out values in registers
224         if (b.Belem)
225             resetEcomsub(b.Belem);     // reset all the Ecomsubs
226         if (b.BC == BCasm)
227             anyiasm = 1;                // we have inline assembler
228         if (b.BC == BCret || b.BC == BCretexp)
229             nretblocks++;
230     }
231 
232     if (!config.fulltypes || (config.flags4 & CFG4optimized))
233     {
234         regm_t noparams = 0;
235         foreach (s; globsym[])
236         {
237             s.Sflags &= ~SFLread;
238             switch (s.Sclass)
239             {
240                 case SC.fastpar:
241                 case SC.shadowreg:
242                     regcon.params |= s.Spregm();
243                     goto case SC.parameter;
244 
245                 case SC.parameter:
246                     if (s.Sfl == FLreg)
247                         noparams |= s.Sregm;
248                     break;
249 
250                 default:
251                     break;
252             }
253         }
254         regcon.params &= ~noparams;
255     }
256 
257     if (config.flags4 & CFG4optimized)
258     {
259         if (nretblocks == 0 &&                  // if no return blocks in function
260             !(sfunc.ty() & mTYnaked))      // naked functions may have hidden veys of returning
261             sfunc.Sflags |= SFLexit;       // mark function as never returning
262 
263         assert(dfo);
264 
265         cgreg_reset();
266         foreach (i, b; dfo[])
267         {
268             dfoidx = cast(int)i;
269             regcon.used = msavereg | regcon.cse.mval;   // registers already in use
270             blcodgen(b);                        // gen code in depth-first order
271             //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used));
272             cgreg_used(dfoidx, b.Bregcon.used); // gather register used information
273         }
274     }
275     else
276     {
277         pass = BackendPass.final_;
278         for (block* b = startblock; b; b = b.Bnext)
279             blcodgen(b);                // generate the code for each block
280     }
281     regcon.immed.mval = 0;
282     assert(!regcon.cse.mops);           // should have all been used
283 
284     // See which variables we can put into registers
285     if (pass != BackendPass.final_ &&
286         !anyiasm)                               // possible LEA or LES opcodes
287     {
288         allregs |= cod3_useBP();                // see if we can use EBP
289 
290         // If pic code, but EBX was never needed
291         if (!(allregs & mask(PICREG)) && !gotref)
292         {
293             allregs |= mask(PICREG);            // EBX can now be used
294             cgreg_assign(retsym);
295             pass = BackendPass.reg;
296         }
297         else if (cgreg_assign(retsym))          // if we found some registers
298             pass = BackendPass.reg;
299         else
300             pass = BackendPass.final_;
301         for (block* b = startblock; b; b = b.Bnext)
302         {
303             code_free(b.Bcode);
304             b.Bcode = null;
305         }
306         goto tryagain;
307     }
308     cgreg_term();
309 
310     // See if we need to enforce a particular stack alignment
311     foreach (s; globsym[])
312     {
313         if (Symbol_Sisdead(*s, anyiasm))
314             continue;
315 
316         switch (s.Sclass)
317         {
318             case SC.register:
319             case SC.auto_:
320             case SC.fastpar:
321                 if (s.Sfl == FLreg)
322                     break;
323 
324                 const sz = type_alignsize(s.Stype);
325                 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX))
326                 {
327                     STACKALIGN = sz;
328                     enforcealign = true;
329                 }
330                 break;
331 
332             default:
333                 break;
334         }
335     }
336 
337     stackoffsets(globsym, false);  // compute final offsets of stack variables
338     cod5_prol_epi();            // see where to place prolog/epilog
339     CSE.finish();               // compute addresses and sizes of CSE saves
340 
341     if (configv.addlinenumbers)
342         objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg));
343 
344     // Otherwise, jmp's to startblock will execute the prolog again
345     assert(!startblock.Bpred);
346 
347     CodeBuilder cdbprolog; cdbprolog.ctor();
348     prolog(cdbprolog);           // gen function start code
349     code *cprolog = cdbprolog.finish();
350     if (cprolog)
351         pinholeopt(cprolog,null);       // optimize
352 
353     funcoffset = Offset(sfunc.Sseg);
354     targ_size_t coffset = Offset(sfunc.Sseg);
355 
356     if (eecontext.EEelem)
357         genEEcode();
358 
359     for (block* b = startblock; b; b = b.Bnext)
360     {
361         // We couldn't do this before because localsize was unknown
362         switch (b.BC)
363         {
364             case BCret:
365                 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked))
366                 {
367                     CodeBuilder cdb; cdb.ctor();
368                     cdb.append(b.Bcode);
369                     cdb.genlinnum(b.Bsrcpos);
370                     b.Bcode = cdb.finish();
371                 }
372                 goto case BCretexp;
373 
374             case BCretexp:
375                 epilog(b);
376                 break;
377 
378             default:
379                 if (b.Bflags & BFLepilog)
380                     epilog(b);
381                 break;
382         }
383         assignaddr(b);                  // assign addresses
384         pinholeopt(b.Bcode,b);         // do pinhole optimization
385         if (b.Bflags & BFLprolog)      // do function prolog
386         {
387             startoffset = coffset + calcblksize(cprolog) - funcoffset;
388             b.Bcode = cat(cprolog,b.Bcode);
389         }
390         cgsched_block(b);
391         b.Bsize = calcblksize(b.Bcode);       // calculate block size
392         if (b.Balign)
393         {
394             targ_size_t u = b.Balign - 1;
395             coffset = (coffset + u) & ~u;
396         }
397         b.Boffset = coffset;           /* offset of this block         */
398         coffset += b.Bsize;            /* offset of following block    */
399     }
400 
401     debug
402     debugw && printf("code addr complete\n");
403 
404     // Do jump optimization
405     bool flag;
406     do
407     {
408         flag = false;
409         for (block* b = startblock; b; b = b.Bnext)
410         {
411             if (b.Bflags & BFLjmpoptdone)      /* if no more jmp opts for this blk */
412                 continue;
413             int i = branch(b,0);            // see if jmp => jmp short
414             if (i)                          // if any bytes saved
415             {
416                 b.Bsize -= i;
417                 auto offset = b.Boffset + b.Bsize;
418                 for (block* bn = b.Bnext; bn; bn = bn.Bnext)
419                 {
420                     if (bn.Balign)
421                     {
422                         targ_size_t u = bn.Balign - 1;
423                         offset = (offset + u) & ~u;
424                     }
425                     bn.Boffset = offset;
426                     offset += bn.Bsize;
427                 }
428                 coffset = offset;
429                 flag = true;
430             }
431         }
432         if (!I16 && !(config.flags4 & CFG4optimized))
433             break;                      // use the long conditional jmps
434     } while (flag);                     // loop till no more bytes saved
435 
436     debug
437     debugw && printf("code jump optimization complete\n");
438 
439     if (usednteh & NTEH_try)
440     {
441         // Do this before code is emitted because we patch some instructions
442         nteh_filltables();
443     }
444 
445     // Compute starting offset for switch tables
446     targ_size_t swoffset;
447     int jmpseg = -1;
448     if (config.flags & CFGromable)
449     {
450         jmpseg = 0;
451         swoffset = coffset;
452     }
453 
454     // Emit the generated code
455     if (eecontext.EEcompile == 1)
456     {
457         codout(sfunc.Sseg,eecontext.EEcode,null);
458         code_free(eecontext.EEcode);
459     }
460     else
461     {
462         __gshared Barray!ubyte disasmBuf;
463         disasmBuf.reset();
464 
465         for (block* b = startblock; b; b = b.Bnext)
466         {
467             if (b.BC == BCjmptab || b.BC == BCswitch)
468             {
469                 if (jmpseg == -1)
470                 {
471                     jmpseg = objmod.jmpTableSegment(sfunc);
472                     swoffset = Offset(jmpseg);
473                 }
474                 swoffset = _align(0,swoffset);
475                 b.Btableoffset = swoffset;     /* offset of sw tab */
476                 swoffset += b.Btablesize;
477             }
478             jmpaddr(b.Bcode);          /* assign jump addresses        */
479 
480             debug
481             if (debugc)
482             {
483                 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n",
484                     cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg));
485                 if (b.Bcode)
486                     printf( "First opcode of block is: %0x\n", b.Bcode.Iop );
487             }
488 
489             if (b.Balign)
490             {   uint u = b.Balign;
491                 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1);
492 
493                 cod3_align_bytes(sfunc.Sseg, nalign);
494             }
495             assert(b.Boffset == Offset(sfunc.Sseg));
496 
497             codout(sfunc.Sseg,b.Bcode,configv.vasm ? &disasmBuf : null);   // output code
498         }
499         if (coffset != Offset(sfunc.Sseg))
500         {
501             debug
502             printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg));
503 
504             assert(0);
505         }
506         sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset;    // size of function
507 
508         if (configv.vasm)
509             disassemble(disasmBuf[]);                   // disassemble the code
510 
511         const nteh = usednteh & NTEH_try;
512         if (nteh)
513         {
514             assert(!(config.flags & CFGromable));
515             //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset);
516             objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset);
517         }
518 
519         // Write out switch tables
520         for (block* b = startblock; b; b = b.Bnext)
521         {
522             switch (b.BC)
523             {
524                 case BCjmptab:              /* if jump table                */
525                     outjmptab(b);           /* write out jump table         */
526                     goto default;
527 
528                 case BCswitch:
529                     outswitab(b);           /* write out switch table       */
530                     goto default;
531 
532                 case BCret:
533                 case BCretexp:
534                     /* Compute offset to return code from start of function */
535                     retoffset = b.Boffset + b.Bsize - retsize - funcoffset;
536 
537                     /* Add 3 bytes to retoffset in case we have an exception
538                      * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT
539                      * IT FIXES THE PROBLEM HERE AS WELL.
540                      */
541                     if (usednteh & NTEH_try)
542                         retoffset += 3;
543                     break;
544 
545                 default:
546                     retoffset = b.Boffset + b.Bsize - funcoffset;
547                     break;
548             }
549         }
550         if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked))
551             /* put line number at end of function on the
552                start of the last instruction
553              */
554             /* Instead, try offset to cleanup code  */
555             if (retoffset < sfunc.Ssize)
556                 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset);
557 
558         static if (MARS)
559         {
560             if (config.exe == EX_WIN64)
561                 win64_pdata(sfunc);
562         }
563 
564         static if (MARS)
565         {
566             if (usednteh & NTEH_try)
567             {
568                 // Do this before code is emitted because we patch some instructions
569                 nteh_gentables(sfunc);
570             }
571             if (usednteh & (EHtry | EHcleanup) &&   // saw BCtry or BC_try or OPddtor
572                 config.ehmethod == EHmethod.EH_DM)
573             {
574                 except_gentables();
575             }
576             if (config.ehmethod == EHmethod.EH_DWARF)
577             {
578                 sfunc.Sfunc.Fstartblock = startblock;
579                 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset);
580                 sfunc.Sfunc.Fstartblock = null;
581             }
582         }
583 
584         for (block* b = startblock; b; b = b.Bnext)
585         {
586             code_free(b.Bcode);
587             b.Bcode = null;
588         }
589     }
590 
591     // Mask of regs saved
592     // BUG: do interrupt functions save BP?
593     tym_t functy = tybasic(sfunc.ty());
594     sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved);
595 
596     debug
597     if (global87.stackused != 0)
598       printf("stackused = %d\n",global87.stackused);
599 
600     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
601 
602     global87.save.dtor();       // clean up ndp save array
603 }
604 
605 /*********************************************
606  * Align sections on the stack.
607  *  base        negative offset of section from frame pointer
608  *  alignment   alignment to use
609  *  bias        difference between where frame pointer points and the STACKALIGNed
610  *              part of the stack
611  * Returns:
612  *  base        revised downward so it is aligned
613  */
614 @trusted
615 targ_size_t alignsection(targ_size_t base, uint alignment, int bias)
616 {
617     assert(cast(long)base <= 0);
618     if (alignment > STACKALIGN)
619         alignment = STACKALIGN;
620     if (alignment)
621     {
622         long sz = cast(long)(-base + bias);
623         assert(sz >= 0);
624         sz &= (alignment - 1);
625         if (sz)
626             base -= alignment - sz;
627     }
628     return base;
629 }
630 
631 /*******************************
632  * Generate code for a function start.
633  * Input:
634  *      Offset(cseg)         address of start of code
635  *      Auto.alignment
636  * Output:
637  *      Offset(cseg)         adjusted for size of code generated
638  *      EBPtoESP
639  *      hasframe
640  *      BPoff
641  */
642 @trusted
643 void prolog(ref CodeBuilder cdb)
644 {
645     bool enter;
646 
647     //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment);
648     debug debugw && printf("funcstart()\n");
649     regcon.immed.mval = 0;                      /* no values in registers yet   */
650     version (FRAMEPTR)
651         EBPtoESP = 0;
652     else
653         EBPtoESP = -REGSIZE;
654     hasframe = 0;
655     bool pushds = false;
656     BPoff = 0;
657     bool pushalloc = false;
658     tym_t tyf = funcsym_p.ty();
659     tym_t tym = tybasic(tyf);
660     const farfunc = tyfarfunc(tym) != 0;
661 
662     if (config.flags3 & CFG3ibt && !I16)
663         cdb.gen1(I32 ? ENDBR32 : ENDBR64);
664 
665     // Special Intel 64 bit ABI prolog setup for variadic functions
666     Symbol *sv64 = null;                        // set to __va_argsave
667     if (I64 && variadic(funcsym_p.Stype))
668     {
669         /* The Intel 64 bit ABI scheme.
670          * abi_sysV_amd64.pdf
671          * Load arguments passed in registers into the varargs save area
672          * so they can be accessed by va_arg().
673          */
674         /* Look for __va_argsave
675          */
676         for (SYMIDX si = 0; si < globsym.length; si++)
677         {
678             Symbol *s = globsym[si];
679             if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0)
680             {
681                 if (!(s.Sflags & SFLdead))
682                     sv64 = s;
683                 break;
684             }
685         }
686     }
687 
688     if (config.flags & CFGalwaysframe ||
689         funcsym_p.Sfunc.Fflags3 & Ffakeeh ||
690         /* The exception stack unwinding mechanism relies on the EBP chain being intact,
691          * so need frame if function can possibly throw
692          */
693         !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) ||
694         cgstate.accessedTLS ||
695         sv64
696        )
697         needframe = 1;
698 
699     CodeBuilder cdbx; cdbx.ctor();
700 
701 Lagain:
702     spoff = 0;
703     char guessneedframe = needframe;
704     int cfa_offset = 0;
705 //    if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)))
706 //      usednteh |= NTEHpassthru;
707 
708     /* Compute BP offsets for variables on stack.
709      * The organization is:
710      *  Para.size    parameters
711      * -------- stack is aligned to STACKALIGN
712      *          seg of return addr      (if far function)
713      *          IP of return addr
714      *  BP.    caller's BP
715      *          DS                      (if Windows prolog/epilog)
716      *          exception handling context symbol
717      *  Fast.size fastpar
718      *  Auto.size    autos and regs
719      *  regsave.off  any saved registers
720      *  Foff    floating register
721      *  Alloca.size  alloca temporary
722      *  CSoff   common subs
723      *  NDPoff  any 8087 saved registers
724      *          monitor context record
725      *          any saved registers
726      */
727 
728     if (tym == TYifunc)
729         Para.size = 26; // how is this number derived?
730     else
731     {
732         version (FRAMEPTR)
733         {
734             bool frame = needframe || tyf & mTYnaked;
735             Para.size = ((farfunc ? 2 : 1) + frame) * REGSIZE;
736             if (frame)
737                 EBPtoESP = -REGSIZE;
738         }
739         else
740             Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE;
741     }
742 
743     /* The real reason for the FAST section is because the implementation of contracts
744      * requires a consistent stack frame location for the 'this' pointer. But if varying
745      * stuff in Auto.offset causes different alignment for that section, the entire block can
746      * shift around, causing a crash in the contracts.
747      * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their
748      * own FAST section, which is never aligned at a size bigger than REGSIZE, and so
749      * its alignment never shifts around.
750      * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned
751      * individually rather than as a group.
752      */
753     Fast.size = 0;
754     static if (NTEXCEPTIONS == 2)
755     {
756         Fast.size -= nteh_contextsym_size();
757         if (config.exe & EX_windos)
758         {
759             if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
760                 Fast.size -= 5 * 4;
761         }
762     }
763 
764     /* Despite what the comment above says, aligning Fast section to size greater
765      * than REGSIZE does not break contract implementation. Fast.offset and
766      * Fast.alignment must be the same for the overriding and
767      * the overridden function, since they have the same parameters. Fast.size
768      * must be the same because otherwise, contract inheritance wouldn't work
769      * even if we didn't align Fast section to size greater than REGSIZE. Therefore,
770      * the only way aligning the section could cause problems with contract
771      * inheritance is if bias (declared below) differed for the overridden
772      * and the overriding function.
773      *
774      * Bias depends on Para.size and needframe. The value of Para.size depends on
775      * whether the function is an interrupt handler and whether it is a farfunc.
776      * DMD does not have _interrupt attribute and D does not make a distinction
777      * between near and far functions, so Para.size should always be 2 * REGSIZE
778      * for D.
779      *
780      * The value of needframe depends on a global setting that is only set
781      * during backend's initialization and on function flag Ffakeeh. On Windows,
782      * that flag is always set for virtual functions, for which contracts are
783      * defined and on other platforms, it is never set. Because of that
784      * the value of neadframe should always be the same for the overridden
785      * and the overriding function, and so bias should be the same too.
786      */
787 
788 version (FRAMEPTR)
789     int bias = enforcealign ? 0 : cast(int)(Para.size);
790 else
791     int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE));
792 
793     if (Fast.alignment < REGSIZE)
794         Fast.alignment = REGSIZE;
795 
796     Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias);
797 
798     if (Auto.alignment < REGSIZE)
799         Auto.alignment = REGSIZE;       // necessary because localsize must be REGSIZE aligned
800     Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias);
801 
802     regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias);
803     //printf("regsave.off = x%x, size = x%x, alignment = %x\n",
804         //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment);
805 
806     if (floatreg)
807     {
808         uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE;
809         Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias);
810         //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize);
811     }
812     else
813         Foff = regsave.off;
814 
815     Alloca.alignment = REGSIZE;
816     Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias);
817 
818     CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias);
819     //printf("CSoff = x%x, size = x%x, alignment = %x\n",
820         //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment);
821 
822     NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias);
823 
824     regm_t topush = fregsaved & ~mfuncreg;          // mask of registers that need saving
825     pushoffuse = false;
826     pushoff = NDPoff;
827     /* We don't keep track of all the pushes and pops in a function. Hence,
828      * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder
829      * won't be setting ESP correctly. With pushoffuse, the registers are restored
830      * from EBP, which is kept track of properly.
831      */
832     if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64))
833     {
834         /* Instead of pushing the registers onto the stack one by one,
835          * allocate space in the stack frame and copy/restore them there.
836          */
837         int xmmtopush = popcnt(topush & XMMREGS);   // XMM regs take 16 bytes
838         int gptopush = popcnt(topush) - xmmtopush;  // general purpose registers to save
839         if (NDPoff || xmmtopush || cgstate.funcarg.size)
840         {
841             pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
842                     xmmtopush ? STACKALIGN : REGSIZE, bias);
843             pushoffuse = true;          // tell others we're using this strategy
844         }
845     }
846 
847     //printf("Fast.size = x%x, Auto.size = x%x\n", cast(int)Fast.size, cast(int)Auto.size);
848 
849     cgstate.funcarg.alignment = STACKALIGN;
850     /* If the function doesn't need the extra alignment, don't do it.
851      * Can expand on this by allowing for locals that don't need extra alignment
852      * and calling functions that don't need it.
853      */
854     if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64))
855     {
856         cgstate.funcarg.alignment = I64 ? 8 : 4;
857     }
858 
859     //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias);
860     cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias);
861 
862     localsize = -cgstate.funcarg.offset;
863 
864     //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n",
865         //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize);
866     assert(cast(targ_ptrdiff_t)localsize >= 0);
867 
868     // Keep the stack aligned by 8 for any subsequent function calls
869     if (!I16 && calledafunc &&
870         (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
871     {
872         int npush = popcnt(topush);            // number of registers that need saving
873         npush += popcnt(topush & XMMREGS);     // XMM regs take 16 bytes, so count them twice
874         if (pushoffuse)
875             npush = 0;
876 
877         //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
878                //npush, Para.size, needframe, localsize);
879 
880         int sz = cast(int)(localsize + npush * REGSIZE);
881         if (!enforcealign)
882         {
883             version (FRAMEPTR)
884                 sz += Para.size;
885             else
886                 sz += Para.size + (needframe ? 0 : -REGSIZE);
887         }
888         if (sz & (STACKALIGN - 1))
889             localsize += STACKALIGN - (sz & (STACKALIGN - 1));
890     }
891     cgstate.funcarg.offset = -localsize;
892 
893     //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n",
894         //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize);
895 
896     uint xlocalsize = cast(uint)localsize;    // amount to subtract from ESP to make room for locals
897 
898     if (tyf & mTYnaked)                 // if no prolog/epilog for function
899     {
900         hasframe = 1;
901         return;
902     }
903 
904     if (tym == TYifunc)
905     {
906         prolog_ifunc(cdbx,&tyf);
907         hasframe = 1;
908         cdb.append(cdbx);
909         goto Lcont;
910     }
911 
912     /* Determine if we need BP set up   */
913     if (enforcealign)
914     {
915         // we need BP to reset the stack before return
916         // otherwise the return address is lost
917         needframe = 1;
918     }
919     else if (config.flags & CFGalwaysframe)
920         needframe = 1;
921     else
922     {
923         if (localsize)
924         {
925             if (I16 ||
926                 !(config.flags4 & CFG4speed) ||
927                 config.target_cpu < TARGET_Pentium ||
928                 farfunc ||
929                 config.flags & CFGstack ||
930                 xlocalsize >= 0x1000 ||
931                 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
932                 anyiasm ||
933                 Alloca.size
934                )
935             {
936                 needframe = 1;
937             }
938         }
939         if (refparam && (anyiasm || I16))
940             needframe = 1;
941     }
942 
943     if (needframe)
944     {
945         assert(mfuncreg & mBP);         // shouldn't have used mBP
946 
947         if (!guessneedframe)            // if guessed wrong
948             goto Lagain;
949     }
950 
951     if (I16 && config.wflags & WFwindows && farfunc)
952     {
953         prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds);
954         enter = false;                  // don't use ENTER instruction
955         hasframe = 1;                   // we have a stack frame
956     }
957     else if (needframe)                 // if variables or parameters
958     {
959         prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset);
960         hasframe = 1;
961     }
962 
963     /* Align the stack if necessary */
964     prolog_stackalign(cdbx);
965 
966     /* Subtract from stack pointer the size of the local stack frame
967      */
968     if (config.flags & CFGstack)        // if stack overflow check
969     {
970         prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
971         if (Alloca.size)
972             prolog_setupalloca(cdbx);
973     }
974     else if (needframe)                      /* if variables or parameters   */
975     {
976         if (xlocalsize)                 /* if any stack offset          */
977         {
978             prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
979             if (Alloca.size)
980                 prolog_setupalloca(cdbx);
981         }
982         else
983             assert(Alloca.size == 0);
984     }
985     else if (xlocalsize)
986     {
987         assert(I32 || I64);
988         prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc);
989         version (FRAMEPTR) { } else
990             BPoff += REGSIZE;
991     }
992     else
993         assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor));
994     EBPtoESP += xlocalsize;
995     if (hasframe)
996         EBPtoESP += REGSIZE;
997 
998     /* Win64 unwind needs the amount of code generated so far
999      */
1000     if (config.exe == EX_WIN64)
1001     {
1002         code *c = cdbx.peek();
1003         pinholeopt(c, null);
1004         prolog_allocoffset = calcblksize(c);
1005     }
1006 
1007     if (usednteh & NTEHjmonitor)
1008     {   Symbol *sthis;
1009 
1010         for (SYMIDX si = 0; 1; si++)
1011         {   assert(si < globsym.length);
1012             sthis = globsym[si];
1013             if (strcmp(sthis.Sident.ptr,"this".ptr) == 0)
1014                 break;
1015         }
1016         nteh_monitor_prolog(cdbx,sthis);
1017         EBPtoESP += 3 * 4;
1018     }
1019 
1020     cdb.append(cdbx);
1021     prolog_saveregs(cdb, topush, cfa_offset);
1022 
1023 Lcont:
1024 
1025     if (config.exe == EX_WIN64)
1026     {
1027         if (variadic(funcsym_p.Stype))
1028             prolog_gen_win64_varargs(cdb);
1029         prolog_loadparams(cdb, tyf, pushalloc);
1030         return;
1031     }
1032 
1033     prolog_ifunc2(cdb, tyf, tym, pushds);
1034 
1035     static if (NTEXCEPTIONS == 2)
1036     {
1037         if (usednteh & NTEH_except)
1038             nteh_setsp(cdb, 0x89);            // MOV __context[EBP].esp,ESP
1039     }
1040 
1041     // Load register parameters off of the stack. Do not use
1042     // assignaddr(), as it will replace the stack reference with
1043     // the register!
1044     prolog_loadparams(cdb, tyf, pushalloc);
1045 
1046     if (sv64)
1047         prolog_genvarargs(cdb, sv64);
1048 
1049     /* Alignment checks
1050      */
1051     //assert(Auto.alignment <= STACKALIGN);
1052     //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0);
1053 }
1054 
1055 /************************************
1056  * Predicate for sorting auto symbols for qsort().
1057  * Returns:
1058  *      < 0     s1 goes farther from frame pointer
1059  *      > 0     s1 goes nearer the frame pointer
1060  *      = 0     no difference
1061  */
1062 
1063 @trusted
1064 extern (C) int
1065  autosort_cmp(scope const void *ps1, scope const void *ps2)
1066 {
1067     Symbol *s1 = *cast(Symbol **)ps1;
1068     Symbol *s2 = *cast(Symbol **)ps2;
1069 
1070     /* Largest align size goes furthest away from frame pointer,
1071      * so they get allocated first.
1072      */
1073     uint alignsize1 = Symbol_Salignsize(*s1);
1074     uint alignsize2 = Symbol_Salignsize(*s2);
1075     if (alignsize1 < alignsize2)
1076         return 1;
1077     else if (alignsize1 > alignsize2)
1078         return -1;
1079 
1080     /* move variables nearer the frame pointer that have higher Sweights
1081      * because addressing mode is fewer bytes. Grouping together high Sweight
1082      * variables also may put them in the same cache
1083      */
1084     if (s1.Sweight < s2.Sweight)
1085         return -1;
1086     else if (s1.Sweight > s2.Sweight)
1087         return 1;
1088 
1089     /* More:
1090      * 1. put static arrays nearest the frame pointer, so buffer overflows
1091      *    can't change other variable contents
1092      * 2. Do the coloring at the byte level to minimize stack usage
1093      */
1094     return 0;
1095 }
1096 
1097 /******************************
1098  * Compute stack frame offsets for local variables.
1099  * that did not make it into registers.
1100  * Params:
1101  *      symtab = function's symbol table
1102  *      estimate = true for do estimate only, false for final
1103  */
1104 @trusted
1105 void stackoffsets(ref symtab_t symtab, bool estimate)
1106 {
1107     //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr);
1108 
1109     Para.initialize();        // parameter offset
1110     Fast.initialize();        // SCfastpar offset
1111     Auto.initialize();        // automatic & register offset
1112     EEStack.initialize();     // for SCstack's
1113 
1114     // Set if doing optimization of auto layout
1115     bool doAutoOpt = estimate && config.flags4 & CFG4optimized;
1116 
1117     // Put autos in another array so we can do optimizations on the stack layout
1118     Symbol*[10] autotmp = void;
1119     Symbol **autos = null;
1120     if (doAutoOpt)
1121     {
1122         if (symtab.length <= autotmp.length)
1123             autos = autotmp.ptr;
1124         else
1125         {   autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof);
1126             assert(autos);
1127         }
1128     }
1129     size_t autosi = 0;  // number used in autos[]
1130 
1131     for (int si = 0; si < symtab.length; si++)
1132     {   Symbol *s = symtab[si];
1133 
1134         /* Don't allocate space for dead or zero size parameters
1135          */
1136         switch (s.Sclass)
1137         {
1138             case SC.fastpar:
1139                 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh))
1140                     goto Ldefault;   // don't need consistent stack frame
1141                 break;
1142 
1143             case SC.parameter:
1144                 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty)))
1145                 {
1146                     Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary
1147                     s.Soffset = Para.offset;
1148                     continue;
1149                 }
1150                 break;          // allocate even if it's dead
1151 
1152             case SC.shadowreg:
1153                 break;          // allocate even if it's dead
1154 
1155             default:
1156             Ldefault:
1157                 if (Symbol_Sisdead(*s, anyiasm))
1158                     continue;       // don't allocate space
1159                 break;
1160         }
1161 
1162         targ_size_t sz = type_size(s.Stype);
1163         if (sz == 0)
1164             sz++;               // can't handle 0 length structs
1165 
1166         uint alignsize = Symbol_Salignsize(*s);
1167         if (alignsize > STACKALIGN)
1168             alignsize = STACKALIGN;         // no point if the stack is less aligned
1169 
1170         //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread);
1171         assert(cast(int)sz >= 0);
1172 
1173         switch (s.Sclass)
1174         {
1175             case SC.fastpar:
1176                 /* Get these
1177                  * right next to the stack frame pointer, EBP.
1178                  * Needed so we can call nested contract functions
1179                  * frequire and fensure.
1180                  */
1181                 if (s.Sfl == FLreg)        // if allocated in register
1182                     continue;
1183                 /* Needed because storing fastpar's on the stack in prolog()
1184                  * does the entire register
1185                  */
1186                 if (sz < REGSIZE)
1187                     sz = REGSIZE;
1188 
1189                 Fast.offset = _align(sz,Fast.offset);
1190                 s.Soffset = Fast.offset;
1191                 Fast.offset += sz;
1192                 //printf("fastpar '%s' sz = %d, fast offset =  x%x, %p\n", s.Sident, cast(int) sz, cast(int) s.Soffset, s);
1193 
1194                 if (alignsize > Fast.alignment)
1195                     Fast.alignment = alignsize;
1196                 break;
1197 
1198             case SC.register:
1199             case SC.auto_:
1200                 if (s.Sfl == FLreg)        // if allocated in register
1201                     break;
1202 
1203                 if (doAutoOpt)
1204                 {   autos[autosi++] = s;    // deal with later
1205                     break;
1206                 }
1207 
1208                 Auto.offset = _align(sz,Auto.offset);
1209                 s.Soffset = Auto.offset;
1210                 Auto.offset += sz;
1211                 //printf("auto    '%s' sz = %d, auto offset =  x%lx\n", s.Sident,sz, cast(long) s.Soffset);
1212 
1213                 if (alignsize > Auto.alignment)
1214                     Auto.alignment = alignsize;
1215                 break;
1216 
1217             case SC.stack:
1218                 EEStack.offset = _align(sz,EEStack.offset);
1219                 s.Soffset = EEStack.offset;
1220                 //printf("EEStack.offset =  x%lx\n",cast(long)s.Soffset);
1221                 EEStack.offset += sz;
1222                 break;
1223 
1224             case SC.shadowreg:
1225             case SC.parameter:
1226                 if (config.exe == EX_WIN64)
1227                 {
1228                     assert((Para.offset & 7) == 0);
1229                     s.Soffset = Para.offset;
1230                     Para.offset += 8;
1231                     break;
1232                 }
1233                 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general,
1234                  * but are 4 byte aligned on the OSX 32 stack.
1235                  */
1236                 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */
1237                 if (alignsize >= 16 &&
1238                     (I64 || (config.exe == EX_OSX &&
1239                          (tyaggregate(s.ty()) || tyvector(s.ty())))))
1240                     Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1);
1241                 s.Soffset = Para.offset;
1242                 //printf("%s param offset =  x%lx, alignsize = %d\n", s.Sident, cast(long) s.Soffset, cast(int) alignsize);
1243                 Para.offset += (s.Sflags & SFLdouble)
1244                             ? type_size(tstypes[TYdouble])   // float passed as double
1245                             : type_size(s.Stype);
1246                 break;
1247 
1248             case SC.pseudo:
1249             case SC.static_:
1250             case SC.bprel:
1251                 break;
1252             default:
1253                 symbol_print(s);
1254                 assert(0);
1255         }
1256     }
1257 
1258     if (autosi)
1259     {
1260         qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp);
1261 
1262         vec_t tbl = vec_calloc(autosi);
1263 
1264         for (size_t si = 0; si < autosi; si++)
1265         {
1266             Symbol *s = autos[si];
1267 
1268             targ_size_t sz = type_size(s.Stype);
1269             if (sz == 0)
1270                 sz++;               // can't handle 0 length structs
1271 
1272             uint alignsize = Symbol_Salignsize(*s);
1273             if (alignsize > STACKALIGN)
1274                 alignsize = STACKALIGN;         // no point if the stack is less aligned
1275 
1276             /* See if we can share storage with another variable
1277              * if their live ranges do not overlap.
1278              */
1279             if (// Don't share because could stomp on variables
1280                 // used in finally blocks
1281                 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) &&
1282                 s.Srange && !(s.Sflags & SFLspill))
1283             {
1284                 for (size_t i = 0; i < si; i++)
1285                 {
1286                     if (!vec_testbit(i,tbl))
1287                         continue;
1288                     Symbol *sp = autos[i];
1289 //printf("auto    s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange));
1290                     if (vec_disjoint(s.Srange,sp.Srange) &&
1291                         !(sp.Soffset & (alignsize - 1)) &&
1292                         sz <= type_size(sp.Stype))
1293                     {
1294                         vec_or(sp.Srange,sp.Srange,s.Srange);
1295                         //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident);
1296                         s.Soffset = sp.Soffset;
1297                         goto L2;
1298                     }
1299                 }
1300             }
1301             Auto.offset = _align(sz,Auto.offset);
1302             s.Soffset = Auto.offset;
1303             //printf("auto    '%s' sz = %d, auto offset =  x%lx\n", s.Sident, sz, cast(long) s.Soffset);
1304             Auto.offset += sz;
1305             if (s.Srange && !(s.Sflags & SFLspill))
1306                 vec_setbit(si,tbl);
1307 
1308             if (alignsize > Auto.alignment)
1309                 Auto.alignment = alignsize;
1310         L2: { }
1311         }
1312 
1313         vec_free(tbl);
1314 
1315         if (autos != autotmp.ptr)
1316             free(autos);
1317     }
1318 }
1319 
1320 /****************************
1321  * Generate code for a block.
1322  */
1323 
1324 @trusted
1325 private void blcodgen(block *bl)
1326 {
1327     regm_t mfuncregsave = mfuncreg;
1328 
1329     //dbg_printf("blcodgen(%p)\n",bl);
1330 
1331     /* Determine existing immediate values in registers by ANDing
1332         together the values from all the predecessors of b.
1333      */
1334     assert(bl.Bregcon.immed.mval == 0);
1335     regcon.immed.mval = 0;      // assume no previous contents in registers
1336 //    regcon.cse.mval = 0;
1337     foreach (bpl; ListRange(bl.Bpred))
1338     {
1339         block *bp = list_block(bpl);
1340 
1341         if (bpl == bl.Bpred)
1342         {   regcon.immed = bp.Bregcon.immed;
1343             regcon.params = bp.Bregcon.params;
1344 //          regcon.cse = bp.Bregcon.cse;
1345         }
1346         else
1347         {
1348             int i;
1349 
1350             regcon.params &= bp.Bregcon.params;
1351             if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0)
1352                 // Actual values must match, too
1353                 for (i = 0; i < REGMAX; i++)
1354                 {
1355                     if (regcon.immed.value[i] != bp.Bregcon.immed.value[i])
1356                         regcon.immed.mval &= ~mask(i);
1357                 }
1358         }
1359     }
1360     regcon.cse.mops &= regcon.cse.mval;
1361 
1362     // Set regcon.mvar according to what variables are in registers for this block
1363     CodeBuilder cdb; cdb.ctor();
1364     regcon.mvar = 0;
1365     regcon.mpvar = 0;
1366     regcon.indexregs = 1;
1367     int anyspill = 0;
1368     char *sflsave = null;
1369     if (config.flags4 & CFG4optimized)
1370     {
1371         CodeBuilder cdbload; cdbload.ctor();
1372         CodeBuilder cdbstore; cdbstore.ctor();
1373 
1374         sflsave = cast(char *) alloca(globsym.length * char.sizeof);
1375         for (SYMIDX i = 0; i < globsym.length; i++)
1376         {
1377             Symbol *s = globsym[i];
1378 
1379             sflsave[i] = s.Sfl;
1380             if (regParamInPreg(s) &&
1381                 regcon.params & s.Spregm() &&
1382                 vec_testbit(dfoidx,s.Srange))
1383             {
1384 //                regcon.used |= s.Spregm();
1385             }
1386 
1387             if (s.Sfl == FLreg)
1388             {
1389                 if (vec_testbit(dfoidx,s.Srange))
1390                 {
1391                     regcon.mvar |= s.Sregm;
1392                     if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
1393                         regcon.mpvar |= s.Sregm;
1394                 }
1395             }
1396             else if (s.Sflags & SFLspill)
1397             {
1398                 if (vec_testbit(dfoidx,s.Srange))
1399                 {
1400                     anyspill = cast(int)(i + 1);
1401                     cgreg_spillreg_prolog(bl,s,cdbstore,cdbload);
1402                     if (vec_testbit(dfoidx,s.Slvreg))
1403                     {
1404                         s.Sfl = FLreg;
1405                         regcon.mvar |= s.Sregm;
1406                         regcon.cse.mval &= ~s.Sregm;
1407                         regcon.immed.mval &= ~s.Sregm;
1408                         regcon.params &= ~s.Sregm;
1409                         if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
1410                             regcon.mpvar |= s.Sregm;
1411                     }
1412                 }
1413             }
1414         }
1415         if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
1416         {
1417             cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval);
1418         }
1419         cdb.append(cdbstore);
1420         cdb.append(cdbload);
1421         mfuncreg &= ~regcon.mvar;               // use these registers
1422         regcon.used |= regcon.mvar;
1423 
1424         // Determine if we have more than 1 uncommitted index register
1425         regcon.indexregs = IDXREGS & ~regcon.mvar;
1426         regcon.indexregs &= regcon.indexregs - 1;
1427     }
1428 
1429     /* This doesn't work when calling the BC_finally function,
1430      * as it is one block calling another.
1431      */
1432     //regsave.idx = 0;
1433 
1434     reflocal = 0;
1435     int refparamsave = refparam;
1436     refparam = 0;
1437     assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops);
1438 
1439     outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave);
1440     bl.Bcode = cdb.finish();
1441 
1442     for (int i = 0; i < anyspill; i++)
1443     {
1444         Symbol *s = globsym[i];
1445         s.Sfl = sflsave[i];    // undo block register assignments
1446     }
1447 
1448     if (reflocal)
1449         bl.Bflags |= BFLreflocal;
1450     if (refparam)
1451         bl.Bflags |= BFLrefparam;
1452     refparam |= refparamsave;
1453     bl.Bregcon.immed = regcon.immed;
1454     bl.Bregcon.cse = regcon.cse;
1455     bl.Bregcon.used = regcon.used;
1456     bl.Bregcon.params = regcon.params;
1457 
1458     debug
1459     debugw && printf("code gen complete\n");
1460 }
1461 
1462 /******************************
1463  * Given a register mask, find and return the number
1464  * of the first register that fits.
1465  */
1466 
1467 @trusted
1468 reg_t findreg(regm_t regm)
1469 {
1470     return findreg(regm, __LINE__, __FILE__);
1471 }
1472 
1473 @trusted
1474 reg_t findreg(regm_t regm, int line, const(char)* file)
1475 {
1476     debug
1477     regm_t regmsave = regm;
1478 
1479     reg_t i = 0;
1480     while (1)
1481     {
1482         if (!(regm & 0xF))
1483         {
1484             regm >>= 4;
1485             i += 4;
1486             if (!regm)
1487                 break;
1488         }
1489         if (regm & 1)
1490             return i;
1491         regm >>= 1;
1492         i++;
1493     }
1494 
1495     debug
1496     printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr);
1497     fflush(stdout);
1498 
1499 //    *(char*)0=0;
1500     assert(0);
1501 }
1502 
1503 /***************
1504  * Free element (but not its leaves! (assume they are already freed))
1505  * Don't decrement Ecount! This is so we can detect if the common subexp
1506  * has already been evaluated.
1507  * If common subexpression is not required anymore, eliminate
1508  * references to it.
1509  */
1510 
1511 @trusted
1512 void freenode(elem *e)
1513 {
1514     elem_debug(e);
1515     //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount);
1516     if (e.Ecomsub--) return;             /* usage count                  */
1517     if (e.Ecount)                        /* if it was a CSE              */
1518     {
1519         for (size_t i = 0; i < regcon.cse.value.length; i++)
1520         {
1521             if (regcon.cse.value[i] == e)       /* if a register is holding it  */
1522             {
1523                 regcon.cse.mval &= ~mask(cast(uint)i);
1524                 regcon.cse.mops &= ~mask(cast(uint)i);    /* free masks                   */
1525             }
1526         }
1527         CSE.remove(e);
1528     }
1529 }
1530 
1531 /*********************************
1532  * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode().
1533  */
1534 
1535 @trusted
1536 private void resetEcomsub(elem *e)
1537 {
1538     while (1)
1539     {
1540         elem_debug(e);
1541         e.Ecomsub = e.Ecount;
1542         const op = e.Eoper;
1543         if (!OTleaf(op))
1544         {
1545             if (OTbinary(op))
1546                 resetEcomsub(e.EV.E2);
1547             e = e.EV.E1;
1548         }
1549         else
1550             break;
1551     }
1552 }
1553 
1554 /*********************************
1555  * Determine if elem e is a register variable.
1556  * Params:
1557  *      e = a register variable
1558  *      pregm = set to mask of registers that make up the variable otherwise not changed
1559  *      reg = the least significant register in pregm, otherwise not changed
1560  * Returns:
1561  *      true if register variable
1562  */
1563 
1564 @trusted
1565 bool isregvar(elem *e, ref regm_t pregm, ref reg_t preg)
1566 {
1567     regm_t regm;
1568     reg_t reg;
1569 
1570     elem_debug(e);
1571     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
1572     {
1573         Symbol* s = e.EV.Vsym;
1574         switch (s.Sfl)
1575         {
1576             case FLreg:
1577                 if (s.Sclass == SC.parameter)
1578                 {   refparam = true;
1579                     reflocal = true;
1580                 }
1581                 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw;
1582                 regm = s.Sregm;
1583                 //assert(tyreg(s.ty()));
1584 static if (0)
1585 {
1586                 // Let's just see if there is a CSE in a reg we can use
1587                 // instead. This helps avoid AGI's.
1588                 if (e.Ecount && e.Ecount != e.Ecomsub)
1589                 {
1590                     foreach (i; 0 .. arraysize(regcon.cse.value))
1591                     {
1592                         if (regcon.cse.value[i] == e)
1593                         {   reg = i;
1594                             break;
1595                         }
1596                     }
1597                 }
1598 }
1599                 assert(regm & regcon.mvar && !(regm & ~regcon.mvar));
1600                 preg = reg;
1601                 pregm = regm;
1602                 return true;
1603 
1604             case FLpseudo:
1605                 uint u = s.Sreglsw;
1606                 regm_t m = mask(u);
1607                 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1608                 {
1609                     preg = u & 7;
1610                     pregm = m;
1611                     return true;
1612                 }
1613                 break;
1614 
1615             default:
1616                 break;
1617         }
1618     }
1619     return false;
1620 }
1621 
1622 /*********************************
1623  * Allocate some registers.
1624  * Input:
1625  *      pretregs        Pointer to mask of registers to make selection from.
1626  *      tym             Mask of type we will store in registers.
1627  * Output:
1628  *      *pretregs       Mask of allocated registers.
1629  *      *preg           Register number of first allocated register.
1630  *      msavereg,mfuncreg       retregs bits are cleared.
1631  *      regcon.cse.mval,regcon.cse.mops updated
1632  * Returns:
1633  *      pointer to code generated if necessary to save any regcon.cse.mops on the
1634  *      stack.
1635  */
1636 
1637 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym)
1638 {
1639     allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__);
1640 }
1641 
1642 @trusted
1643 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym
1644         ,int line,const(char)* file)
1645 {
1646         reg_t reg;
1647 
1648 static if (0)
1649 {
1650         if (pass == BackendPass.final_)
1651         {
1652             printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym %s\n",
1653                 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval),
1654                 regm_str(msavereg),regm_str(*pretregs),tym_str(tym));
1655         }
1656 }
1657         tym = tybasic(tym);
1658         uint size = _tysize[tym];
1659         *pretregs &= mES | allregs | XMMREGS;
1660         regm_t retregs = *pretregs;
1661 
1662         debug if (retregs == 0)
1663             printf("allocreg: file %s(%d)\n", file, line);
1664 
1665         if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars
1666         {
1667             if (size <= REGSIZE || (retregs & XMMREGS))
1668             {
1669                 *preg = findreg(retregs);
1670                 assert(retregs == mask(*preg)); /* no more bits are set */
1671             }
1672             else if (size <= 2 * REGSIZE)
1673             {
1674                 *preg = findregmsw(retregs);
1675                 assert(retregs & mLSW);
1676             }
1677             else
1678                 assert(0);
1679             getregs(cdb,retregs);
1680             return;
1681         }
1682         int count = 0;
1683 L1:
1684         //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
1685         assert(++count < 20);           /* fail instead of hanging if blocked */
1686         assert(retregs);
1687         reg_t msreg = NOREG, lsreg = NOREG;  /* no value assigned yet        */
1688 L3:
1689         //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
1690         regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params);
1691         if (!r)
1692         {
1693             r = retregs & ~(msavereg | regcon.cse.mval);
1694             if (!r)
1695             {
1696                 r = retregs & ~(msavereg | regcon.cse.mops);
1697                 if (!r)
1698                 {   r = retregs & ~msavereg;
1699                     if (!r)
1700                         r = retregs;
1701                 }
1702             }
1703         }
1704 
1705         if (size <= REGSIZE || retregs & XMMREGS)
1706         {
1707             if (r & ~mBP)
1708                 r &= ~mBP;
1709 
1710             // If only one index register, prefer to not use LSW registers
1711             if (!regcon.indexregs && r & ~mLSW)
1712                 r &= ~mLSW;
1713 
1714             if (pass == BackendPass.final_ && r & ~lastretregs && !I16)
1715             {   // Try not to always allocate the same register,
1716                 // to schedule better
1717 
1718                 r &= ~lastretregs;
1719                 if (r & ~last2retregs)
1720                 {
1721                     r &= ~last2retregs;
1722                     if (r & ~last3retregs)
1723                     {
1724                         r &= ~last3retregs;
1725                         if (r & ~last4retregs)
1726                         {
1727                             r &= ~last4retregs;
1728 //                          if (r & ~last5retregs)
1729 //                              r &= ~last5retregs;
1730                         }
1731                     }
1732                 }
1733                 if (r & ~mfuncreg)
1734                     r &= ~mfuncreg;
1735             }
1736             reg = findreg(r);
1737             retregs = mask(reg);
1738         }
1739         else if (size <= 2 * REGSIZE)
1740         {
1741             /* Select pair with both regs free. Failing */
1742             /* that, select pair with one reg free.             */
1743 
1744             if (r & mBP)
1745             {
1746                 retregs &= ~mBP;
1747                 goto L3;
1748             }
1749 
1750             if (r & mMSW)
1751             {
1752                 if (r & mDX)
1753                     msreg = DX;                 /* prefer to use DX over CX */
1754                 else
1755                     msreg = findregmsw(r);
1756                 r &= mLSW;                      /* see if there's an LSW also */
1757                 if (r)
1758                     lsreg = findreg(r);
1759                 else if (lsreg == NOREG)   /* if don't have LSW yet */
1760                 {
1761                     retregs &= mLSW;
1762                     goto L3;
1763                 }
1764             }
1765             else
1766             {
1767                 if (I64 && !(r & mLSW))
1768                 {
1769                     retregs = *pretregs & (mMSW | mLSW);
1770                     assert(retregs);
1771                     goto L1;
1772                 }
1773                 lsreg = findreglsw(r);
1774                 if (msreg == NOREG)
1775                 {
1776                     retregs &= mMSW;
1777                     assert(retregs);
1778                     goto L3;
1779                 }
1780             }
1781             reg = (msreg == ES) ? lsreg : msreg;
1782             retregs = mask(msreg) | mask(lsreg);
1783         }
1784         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
1785         {
1786             debug
1787             if (retregs != DOUBLEREGS)
1788                 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs));
1789 
1790             assert(retregs == DOUBLEREGS);
1791             reg = AX;
1792         }
1793         else
1794         {
1795             debug
1796             {
1797                 printf("%s\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n",
1798                     tym_str(tym),file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym);
1799             }
1800             assert(0);
1801         }
1802         if (retregs & regcon.mvar)              // if conflict with reg vars
1803         {
1804             if (!(size > REGSIZE && *pretregs == (mAX | mDX)))
1805             {
1806                 retregs = (*pretregs &= ~(retregs & regcon.mvar));
1807                 goto L1;                // try other registers
1808             }
1809         }
1810         *preg = reg;
1811         *pretregs = retregs;
1812 
1813         //printf("Allocating %s\n",regm_str(retregs));
1814         last5retregs = last4retregs;
1815         last4retregs = last3retregs;
1816         last3retregs = last2retregs;
1817         last2retregs = lastretregs;
1818         lastretregs = retregs;
1819         getregs(cdb, retregs);
1820 }
1821 
1822 
1823 /*****************************************
1824  * Allocate a scratch register.
1825  * Params:
1826  *      cdb = where to write any generated code to
1827  *      regm = mask of registers to pick one from
1828  * Returns:
1829  *      selected register
1830  */
1831 @trusted
1832 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm)
1833 {
1834     reg_t r;
1835     allocreg(cdb, &regm, &r, TYoffset);
1836     return r;
1837 }
1838 
1839 
1840 /******************************
1841  * Determine registers that should be destroyed upon arrival
1842  * to code entry point for exception handling.
1843  */
1844 @trusted
1845 regm_t lpadregs()
1846 {
1847     regm_t used;
1848     if (config.ehmethod == EHmethod.EH_DWARF)
1849         used = allregs & ~mfuncreg;
1850     else
1851         used = (I32 | I64) ? allregs : (ALLREGS | mES);
1852     //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg));
1853     return used;
1854 }
1855 
1856 
1857 /*************************
1858  * Mark registers as used.
1859  */
1860 
1861 @trusted
1862 void useregs(regm_t regm)
1863 {
1864     //printf("useregs(x%x) %s\n", regm, regm_str(regm));
1865     mfuncreg &= ~regm;
1866     regcon.used |= regm;                // registers used in this block
1867     regcon.params &= ~regm;
1868     if (regm & regcon.mpvar)            // if modified a fastpar register variable
1869         regcon.params = 0;              // toss them all out
1870 }
1871 
1872 /*************************
1873  * We are going to use the registers in mask r.
1874  * Generate any code necessary to save any regs.
1875  */
1876 
1877 @trusted
1878 void getregs(ref CodeBuilder cdb, regm_t r)
1879 {
1880     //printf("getregs(x%x) %s\n", r, regm_str(r));
1881     regm_t ms = r & regcon.cse.mops;           // mask of common subs we must save
1882     useregs(r);
1883     regcon.cse.mval &= ~r;
1884     msavereg &= ~r;                     // regs that are destroyed
1885     regcon.immed.mval &= ~r;
1886     if (ms)
1887         cse_save(cdb, ms);
1888 }
1889 
1890 /*************************
1891  * We are going to use the registers in mask r.
1892  * Same as getregs(), but assert if code is needed to be generated.
1893  */
1894 @trusted
1895 void getregsNoSave(regm_t r)
1896 {
1897     //printf("getregsNoSave(x%x) %s\n", r, regm_str(r));
1898     assert(!(r & regcon.cse.mops));            // mask of common subs we must save
1899     useregs(r);
1900     regcon.cse.mval &= ~r;
1901     msavereg &= ~r;                     // regs that are destroyed
1902     regcon.immed.mval &= ~r;
1903 }
1904 
1905 /*****************************************
1906  * Copy registers in cse.mops into memory.
1907  */
1908 
1909 @trusted
1910 private void cse_save(ref CodeBuilder cdb, regm_t ms)
1911 {
1912     assert((ms & regcon.cse.mops) == ms);
1913     regcon.cse.mops &= ~ms;
1914 
1915     /* Skip CSEs that are already saved */
1916     for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1)
1917     {
1918         if (regm & ms)
1919         {
1920             const e = regcon.cse.value[findreg(regm)];
1921             const sz = tysize(e.Ety);
1922             foreach (const ref cse; CSE.filter(e))
1923             {
1924                 if (sz <= REGSIZE ||
1925                     sz <= 2 * REGSIZE &&
1926                         (regm & mMSW && cse.regm & mMSW ||
1927                          regm & mLSW && cse.regm & mLSW) ||
1928                     sz == 4 * REGSIZE && regm == cse.regm
1929                    )
1930                 {
1931                     ms &= ~regm;
1932                     if (!ms)
1933                         return;
1934                     break;
1935                 }
1936             }
1937         }
1938     }
1939 
1940     while (ms)
1941     {
1942         auto cse = CSE.add();
1943         reg_t reg = findreg(ms);          /* the register to save         */
1944         cse.e = regcon.cse.value[reg];
1945         cse.regm = mask(reg);
1946 
1947         ms &= ~mask(reg);           /* turn off reg bit in ms       */
1948 
1949         // If we can simply reload the CSE, we don't need to save it
1950         if (cse_simple(&cse.csimple, cse.e))
1951             cse.flags |= CSEsimple;
1952         else
1953         {
1954             CSE.updateSizeAndAlign(cse.e);
1955             gen_storecse(cdb, cse.e.Ety, reg, cse.slot);
1956             reflocal = true;
1957         }
1958     }
1959 }
1960 
1961 /******************************************
1962  * Getregs without marking immediate register values as gone.
1963  */
1964 
1965 @trusted
1966 void getregs_imm(ref CodeBuilder cdb, regm_t r)
1967 {
1968     regm_t save = regcon.immed.mval;
1969     getregs(cdb,r);
1970     regcon.immed.mval = save;
1971 }
1972 
1973 /******************************************
1974  * Flush all CSE's out of registers and into memory.
1975  * Input:
1976  *      do87    !=0 means save 87 registers too
1977  */
1978 
1979 @trusted
1980 void cse_flush(ref CodeBuilder cdb, int do87)
1981 {
1982     //dbg_printf("cse_flush()\n");
1983     cse_save(cdb,regcon.cse.mops);      // save any CSEs to memory
1984     if (do87)
1985         save87(cdb);    // save any 8087 temporaries
1986 }
1987 
1988 /*************************
1989  * Common subexpressions exist in registers. Note this in regcon.cse.mval.
1990  * Input:
1991  *      e       the subexpression
1992  *      regm    mask of registers holding it
1993  *      opsflag if != 0 then regcon.cse.mops gets set too
1994  * Returns:
1995  *      false   not saved as a CSE
1996  *      true    saved as a CSE
1997  */
1998 
1999 @trusted
2000 bool cssave(elem *e,regm_t regm,uint opsflag)
2001 {
2002     bool result = false;
2003 
2004     /*if (e.Ecount && e.Ecount == e.Ecomsub)*/
2005     if (e.Ecount && e.Ecomsub)
2006     {
2007         if (!opsflag && pass != BackendPass.final_ && (I32 || I64))
2008             return false;
2009 
2010         //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag);
2011         regm &= mBP | ALLREGS | mES | XMMREGS;    /* just to be sure              */
2012 
2013 /+
2014         /* Do not register CSEs if they are register variables and      */
2015         /* are not operator nodes. This forces the register allocation  */
2016         /* to go through allocreg(), which will prevent using register  */
2017         /* variables for scratch.                                       */
2018         if (opsflag || !(regm & regcon.mvar))
2019 +/
2020             for (uint i = 0; regm; i++)
2021             {
2022                 regm_t mi = mask(i);
2023                 if (regm & mi)
2024                 {
2025                     regm &= ~mi;
2026 
2027                     // If we don't need this CSE, and the register already
2028                     // holds a CSE that we do need, don't mark the new one
2029                     if (regcon.cse.mval & mi && regcon.cse.value[i] != e &&
2030                         !opsflag && regcon.cse.mops & mi)
2031                         continue;
2032 
2033                     regcon.cse.mval |= mi;
2034                     if (opsflag)
2035                         regcon.cse.mops |= mi;
2036                     //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e);
2037                     regcon.cse.value[i] = e;
2038                     result = true;
2039                 }
2040             }
2041     }
2042     return result;
2043 }
2044 
2045 /*************************************
2046  * Determine if a computation should be done into a register.
2047  */
2048 
2049 @trusted
2050 bool evalinregister(elem *e)
2051 {
2052     if (config.exe == EX_WIN64 && e.Eoper == OPrelconst)
2053         return true;
2054 
2055     if (e.Ecount == 0)             /* elem is not a CSE, therefore */
2056                                     /* we don't need to evaluate it */
2057                                     /* in a register                */
2058         return false;
2059     if (!OTleaf(e.Eoper))          /* operators are always in register */
2060         return true;
2061 
2062     // Need to rethink this code if float or double can be CSE'd
2063     uint sz = tysize(e.Ety);
2064     if (e.Ecount == e.Ecomsub)    /* elem is a CSE that needs     */
2065                                     /* to be generated              */
2066     {
2067         if ((I32 || I64) &&
2068             //pass == BackendPass.final_ && // bug 8987
2069             sz <= REGSIZE)
2070         {
2071             // Do it only if at least 2 registers are available
2072             regm_t m = allregs & ~regcon.mvar;
2073             if (sz == 1)
2074                 m &= BYTEREGS;
2075             if (m & (m - 1))        // if more than one register
2076             {   // Need to be at least 3 registers available, as
2077                 // addressing modes can use up 2.
2078                 while (!(m & 1))
2079                     m >>= 1;
2080                 m >>= 1;
2081                 if (m & (m - 1))
2082                     return true;
2083             }
2084         }
2085         return false;
2086     }
2087 
2088     /* Elem is now a CSE that might have been generated. If so, and */
2089     /* it's in a register already, the computation should be done   */
2090     /* using that register.                                         */
2091     regm_t emask = 0;
2092     for (uint i = 0; i < regcon.cse.value.length; i++)
2093         if (regcon.cse.value[i] == e)
2094             emask |= mask(i);
2095     emask &= regcon.cse.mval;       // mask of available CSEs
2096     if (sz <= REGSIZE)
2097         return emask != 0;      /* the CSE is in a register     */
2098     else if (sz <= 2 * REGSIZE)
2099         return (emask & mMSW) && (emask & mLSW);
2100     return true;                    /* cop-out for now              */
2101 }
2102 
2103 /*******************************************************
2104  * Return mask of scratch registers.
2105  */
2106 
2107 @trusted
2108 regm_t getscratch()
2109 {
2110     regm_t scratch = 0;
2111     if (pass == BackendPass.final_)
2112     {
2113         scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval |
2114                   regcon.immed.mval | regcon.params | mfuncreg);
2115     }
2116     return scratch;
2117 }
2118 
2119 /******************************
2120  * Evaluate an elem that is a common subexp that has been encountered
2121  * before.
2122  * Look first to see if it is already in a register.
2123  * Params:
2124  *      cdb = sink for generated code
2125  *      e = the elem
2126  *      pretregs = input is mask of registers, output is result register
2127  */
2128 
2129 @trusted
2130 private void comsub(ref CodeBuilder cdb,elem *e, ref regm_t pretregs)
2131 {
2132     tym_t tym;
2133     regm_t regm,emask;
2134     reg_t reg;
2135     uint byte_,sz;
2136 
2137     //printf("comsub(e = %p, pretregs = %s)\n",e,regm_str(pretregs));
2138     elem_debug(e);
2139 
2140     debug
2141     {
2142         if (e.Ecomsub > e.Ecount)
2143             elem_print(e);
2144     }
2145 
2146     assert(e.Ecomsub <= e.Ecount);
2147 
2148     if (pretregs == 0)        // no possible side effects anyway
2149     {
2150         return;
2151     }
2152 
2153     /* First construct a mask, emask, of all the registers that
2154      * have the right contents.
2155      */
2156     emask = 0;
2157     for (uint i = 0; i < regcon.cse.value.length; i++)
2158     {
2159         //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]);
2160         if (regcon.cse.value[i] == e)   // if contents are right
2161                 emask |= mask(i);       // turn on bit for reg
2162     }
2163     emask &= regcon.cse.mval;                     // make sure all bits are valid
2164 
2165     if (emask & XMMREGS && pretregs == mPSW)
2166         { }
2167     else if (tyxmmreg(e.Ety) && config.fpxmmregs)
2168     {
2169         if (pretregs & (mST0 | mST01))
2170         {
2171             regm_t retregs = pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1;
2172             comsub(cdb, e, retregs);
2173             fixresult(cdb,e,retregs,&pretregs);
2174             return;
2175         }
2176     }
2177     else if (tyfloating(e.Ety) && config.inline8087)
2178     {
2179         comsub87(cdb,e,&pretregs);
2180         return;
2181     }
2182 
2183 
2184     /* create mask of CSEs */
2185     regm_t csemask = CSE.mask(e);
2186     csemask &= ~emask;            // stuff already in registers
2187 
2188     debug if (debugw)
2189     {
2190         printf("comsub(e=%p): pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n",
2191                 e,regm_str(pretregs),regm_str(emask),regm_str(csemask),
2192                 regm_str(regcon.cse.mval),regm_str(regcon.mvar));
2193         if (regcon.cse.mval & 1)
2194             elem_print(regcon.cse.value[0]);
2195     }
2196 
2197     tym = tybasic(e.Ety);
2198     sz = _tysize[tym];
2199     byte_ = sz == 1;
2200 
2201     if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register
2202     {
2203         /* First see if it is already in a correct register     */
2204 
2205         regm = emask & pretregs;
2206         if (regm == 0)
2207             regm = emask;               /* try any other register       */
2208         if (regm)                       /* if it's in a register        */
2209         {
2210             if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (pretregs & regcon.mvar) == pretregs)
2211             {
2212                 regm = mask(findreg(regm));
2213                 fixresult(cdb,e,regm,&pretregs);
2214                 return;
2215             }
2216         }
2217 
2218         if (OTleaf(e.Eoper))                  /* if not op or func            */
2219             goto reload;                      /* reload data                  */
2220 
2221         foreach (ref cse; CSE.filter(e))
2222         {
2223             regm_t retregs;
2224 
2225             if (cse.flags & CSEsimple)
2226             {
2227                 retregs = pretregs;
2228                 if (byte_ && !(retregs & BYTEREGS))
2229                     retregs = BYTEREGS;
2230                 else if (!(retregs & allregs))
2231                     retregs = allregs;
2232                 allocreg(cdb,&retregs,&reg,tym);
2233                 code *cr = &cse.csimple;
2234                 cr.setReg(reg);
2235                 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1)
2236                     cr.Irex |= REX;
2237                 cdb.gen(cr);
2238                 goto L10;
2239             }
2240             else
2241             {
2242                 reflocal = true;
2243                 cse.flags |= CSEload;
2244                 if (pretregs == mPSW)  // if result in CCs only
2245                 {
2246                     if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety)))
2247                     {
2248                         retregs = XMMREGS;
2249                         allocreg(cdb,&retregs,&reg,tym);
2250                         gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2251                         regcon.cse.mval |= mask(reg); // cs is in a reg
2252                         regcon.cse.value[reg] = e;
2253                         fixresult(cdb,e,retregs,&pretregs);
2254                     }
2255                     else
2256                     {
2257                         // CMP cs[BP],0
2258                         gen_testcse(cdb, cse.e.Ety, sz, cse.slot);
2259                     }
2260                 }
2261                 else
2262                 {
2263                     retregs = pretregs;
2264                     if (byte_ && !(retregs & BYTEREGS))
2265                         retregs = BYTEREGS;
2266                     allocreg(cdb,&retregs,&reg,tym);
2267                     gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2268                 L10:
2269                     regcon.cse.mval |= mask(reg); // cs is in a reg
2270                     regcon.cse.value[reg] = e;
2271                     fixresult(cdb,e,retregs,&pretregs);
2272                 }
2273             }
2274             return;
2275         }
2276 
2277         debug
2278         {
2279             printf("couldn't find cse e = %p, pass = %d\n",e,pass);
2280             elem_print(e);
2281         }
2282         assert(0);                      /* should have found it         */
2283     }
2284     else                                  /* reg pair is req'd            */
2285     if (sz <= 2 * REGSIZE)
2286     {
2287         reg_t msreg,lsreg;
2288 
2289         /* see if we have both  */
2290         if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP)))
2291         {                               /* we don't have both           */
2292             debug if (!OTleaf(e.Eoper))
2293             {
2294                 printf("e = %p, op = x%x, emask = %s, csemask = %s\n",
2295                     e,e.Eoper,regm_str(emask),regm_str(csemask));
2296                 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW);
2297                 elem_print(e);
2298             }
2299 
2300             assert(OTleaf(e.Eoper));        /* must have both for operators */
2301             goto reload;
2302         }
2303 
2304         /* Look for right vals in any regs      */
2305         regm = pretregs & mMSW;
2306         if (emask & regm)
2307             msreg = findreg(emask & regm);
2308         else if (emask & mMSW)
2309             msreg = findregmsw(emask);
2310         else                    /* reload from cse array        */
2311         {
2312             if (!regm)
2313                 regm = mMSW & ALLREGS;
2314             allocreg(cdb,&regm,&msreg,TYint);
2315             loadcse(cdb,e,msreg,mMSW);
2316         }
2317 
2318         regm = pretregs & (mLSW | mBP);
2319         if (emask & regm)
2320             lsreg = findreg(emask & regm);
2321         else if (emask & (mLSW | mBP))
2322             lsreg = findreglsw(emask);
2323         else
2324         {
2325             if (!regm)
2326                 regm = mLSW;
2327             allocreg(cdb,&regm,&lsreg,TYint);
2328             loadcse(cdb,e,lsreg,mLSW | mBP);
2329         }
2330 
2331         regm = mask(msreg) | mask(lsreg);       /* mask of result       */
2332         fixresult(cdb,e,regm,&pretregs);
2333         return;
2334     }
2335     else if (tym == TYdouble || tym == TYdouble_alias)    // double
2336     {
2337         assert(I16);
2338         if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16)
2339         {
2340             static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d
2341             for (reg = 0; reg != NOREG; reg = dblreg[reg])
2342             {
2343                 assert(cast(int) reg >= 0 && reg <= 7);
2344                 if (mask(reg) & csemask)
2345                     loadcse(cdb,e,reg,mask(reg));
2346             }
2347             regm = DOUBLEREGS_16;
2348             fixresult(cdb,e,regm,&pretregs);
2349             return;
2350         }
2351         if (OTleaf(e.Eoper)) goto reload;
2352 
2353         debug
2354         printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask));
2355 
2356         assert(0);
2357     }
2358     else
2359     {
2360         debug
2361         printf("e = %p, tym = x%x\n",e,tym);
2362 
2363         assert(0);
2364     }
2365 
2366 reload:                                 /* reload result from memory    */
2367     switch (e.Eoper)
2368     {
2369         case OPrelconst:
2370             cdrelconst(cdb,e,&pretregs);
2371             break;
2372 
2373         case OPgot:
2374             if (config.exe & EX_posix)
2375             {
2376                 cdgot(cdb,e,&pretregs);
2377                 break;
2378             }
2379             goto default;
2380 
2381         default:
2382             if (pretregs == mPSW &&
2383                 config.fpxmmregs &&
2384                 (tyxmmreg(tym) || tysimd(tym)))
2385             {
2386                 regm_t retregs = XMMREGS | mPSW;
2387                 loaddata(cdb,e,&retregs);
2388                 cssave(e,retregs,false);
2389                 return;
2390             }
2391             loaddata(cdb,e,&pretregs);
2392             break;
2393     }
2394     cssave(e,pretregs,false);
2395 }
2396 
2397 
2398 /*****************************
2399  * Load reg from cse save area on stack.
2400  */
2401 
2402 @trusted
2403 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm)
2404 {
2405     foreach (ref cse; CSE.filter(e))
2406     {
2407         //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm));
2408         if (cse.regm & regm)
2409         {
2410             reflocal = true;
2411             cse.flags |= CSEload;    /* it was loaded        */
2412             regcon.cse.value[reg] = e;
2413             regcon.cse.mval |= mask(reg);
2414             getregs(cdb,mask(reg));
2415             gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2416             return;
2417         }
2418     }
2419     debug
2420     {
2421         printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm));
2422         elem_print(e);
2423     }
2424     assert(0);
2425 }
2426 
2427 /***************************
2428  * Generate code sequence for an elem.
2429  * Input:
2430  *      pretregs =      mask of possible registers to return result in
2431  *                      Note:   longs are in AX,BX or CX,DX or SI,DI
2432  *                              doubles are AX,BX,CX,DX only
2433  *      constflag =     1 for user of result will not modify the
2434  *                      registers returned in *pretregs.
2435  *                      2 for freenode() not called.
2436  * Output:
2437  *      *pretregs       mask of registers result is returned in
2438  * Returns:
2439  *      pointer to code sequence generated
2440  */
2441 
2442 @trusted
2443 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op)
2444 {
2445     (*cdxxx[op])(cdb,e,pretregs);
2446 }
2447 
2448 // jump table
2449 private __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx =
2450 [
2451     OPunde:    &cderr,
2452     OPadd:     &cdorth,
2453     OPmul:     &cdmul,
2454     OPand:     &cdorth,
2455     OPmin:     &cdorth,
2456     OPnot:     &cdnot,
2457     OPcom:     &cdcom,
2458     OPcond:    &cdcond,
2459     OPcomma:   &cdcomma,
2460     OPremquo:  &cddiv,
2461     OPdiv:     &cddiv,
2462     OPmod:     &cddiv,
2463     OPxor:     &cdorth,
2464     OPstring:  &cderr,
2465     OPrelconst: &cdrelconst,
2466     OPinp:     &cdport,
2467     OPoutp:    &cdport,
2468     OPasm:     &cdasm,
2469     OPinfo:    &cdinfo,
2470     OPdctor:   &cddctor,
2471     OPddtor:   &cdddtor,
2472     OPctor:    &cdctor,
2473     OPdtor:    &cddtor,
2474     OPmark:    &cdmark,
2475     OPvoid:    &cdvoid,
2476     OPhalt:    &cdhalt,
2477     OPnullptr: &cderr,
2478     OPpair:    &cdpair,
2479     OPrpair:   &cdpair,
2480 
2481     OPor:      &cdorth,
2482     OPoror:    &cdloglog,
2483     OPandand:  &cdloglog,
2484     OProl:     &cdshift,
2485     OPror:     &cdshift,
2486     OPshl:     &cdshift,
2487     OPshr:     &cdshift,
2488     OPashr:    &cdshift,
2489     OPbit:     &cderr,
2490     OPind:     &cdind,
2491     OPaddr:    &cderr,
2492     OPneg:     &cdneg,
2493     OPuadd:    &cderr,
2494     OPabs:     &cdabs,
2495     OPtoprec:  &cdtoprec,
2496     OPsqrt:    &cdneg,
2497     OPsin:     &cdneg,
2498     OPcos:     &cdneg,
2499     OPscale:   &cdscale,
2500     OPyl2x:    &cdscale,
2501     OPyl2xp1:  &cdscale,
2502     OPcmpxchg:     &cdcmpxchg,
2503     OPrint:    &cdneg,
2504     OPrndtol:  &cdrndtol,
2505     OPstrlen:  &cdstrlen,
2506     OPstrcpy:  &cdstrcpy,
2507     OPmemcpy:  &cdmemcpy,
2508     OPmemset:  &cdmemset,
2509     OPstrcat:  &cderr,
2510     OPstrcmp:  &cdstrcmp,
2511     OPmemcmp:  &cdmemcmp,
2512     OPsetjmp:  &cdsetjmp,
2513     OPnegass:  &cdaddass,
2514     OPpreinc:  &cderr,
2515     OPpredec:  &cderr,
2516     OPstreq:   &cdstreq,
2517     OPpostinc: &cdpost,
2518     OPpostdec: &cdpost,
2519     OPeq:      &cdeq,
2520     OPaddass:  &cdaddass,
2521     OPminass:  &cdaddass,
2522     OPmulass:  &cdmulass,
2523     OPdivass:  &cddivass,
2524     OPmodass:  &cddivass,
2525     OPshrass:  &cdshass,
2526     OPashrass: &cdshass,
2527     OPshlass:  &cdshass,
2528     OPandass:  &cdaddass,
2529     OPxorass:  &cdaddass,
2530     OPorass:   &cdaddass,
2531 
2532     OPle:      &cdcmp,
2533     OPgt:      &cdcmp,
2534     OPlt:      &cdcmp,
2535     OPge:      &cdcmp,
2536     OPeqeq:    &cdcmp,
2537     OPne:      &cdcmp,
2538 
2539     OPunord:   &cdcmp,
2540     OPlg:      &cdcmp,
2541     OPleg:     &cdcmp,
2542     OPule:     &cdcmp,
2543     OPul:      &cdcmp,
2544     OPuge:     &cdcmp,
2545     OPug:      &cdcmp,
2546     OPue:      &cdcmp,
2547     OPngt:     &cdcmp,
2548     OPnge:     &cdcmp,
2549     OPnlt:     &cdcmp,
2550     OPnle:     &cdcmp,
2551     OPord:     &cdcmp,
2552     OPnlg:     &cdcmp,
2553     OPnleg:    &cdcmp,
2554     OPnule:    &cdcmp,
2555     OPnul:     &cdcmp,
2556     OPnuge:    &cdcmp,
2557     OPnug:     &cdcmp,
2558     OPnue:     &cdcmp,
2559 
2560     OPvp_fp:   &cdcnvt,
2561     OPcvp_fp:  &cdcnvt,
2562     OPoffset:  &cdlngsht,
2563     OPnp_fp:   &cdshtlng,
2564     OPnp_f16p: &cdfar16,
2565     OPf16p_np: &cdfar16,
2566 
2567     OPs16_32:  &cdshtlng,
2568     OPu16_32:  &cdshtlng,
2569     OPd_s32:   &cdcnvt,
2570     OPb_8:     &cdcnvt,
2571     OPs32_d:   &cdcnvt,
2572     OPd_s16:   &cdcnvt,
2573     OPs16_d:   &cdcnvt,
2574     OPd_u16:   &cdcnvt,
2575     OPu16_d:   &cdcnvt,
2576     OPd_u32:   &cdcnvt,
2577     OPu32_d:   &cdcnvt,
2578     OP32_16:   &cdlngsht,
2579     OPd_f:     &cdcnvt,
2580     OPf_d:     &cdcnvt,
2581     OPd_ld:    &cdcnvt,
2582     OPld_d:    &cdcnvt,
2583     OPc_r:     &cdconvt87,
2584     OPc_i:     &cdconvt87,
2585     OPu8_16:   &cdbyteint,
2586     OPs8_16:   &cdbyteint,
2587     OP16_8:    &cdlngsht,
2588     OPu32_64:  &cdshtlng,
2589     OPs32_64:  &cdshtlng,
2590     OP64_32:   &cdlngsht,
2591     OPu64_128: &cdshtlng,
2592     OPs64_128: &cdshtlng,
2593     OP128_64:  &cdlngsht,
2594     OPmsw:     &cdmsw,
2595 
2596     OPd_s64:   &cdcnvt,
2597     OPs64_d:   &cdcnvt,
2598     OPd_u64:   &cdcnvt,
2599     OPu64_d:   &cdcnvt,
2600     OPld_u64:  &cdcnvt,
2601     OPparam:   &cderr,
2602     OPsizeof:  &cderr,
2603     OParrow:   &cderr,
2604     OParrowstar: &cderr,
2605     OPcolon:   &cderr,
2606     OPcolon2:  &cderr,
2607     OPbool:    &cdnot,
2608     OPcall:    &cdfunc,
2609     OPucall:   &cdfunc,
2610     OPcallns:  &cdfunc,
2611     OPucallns: &cdfunc,
2612     OPstrpar:  &cderr,
2613     OPstrctor: &cderr,
2614     OPstrthis: &cdstrthis,
2615     OPconst:   &cderr,
2616     OPvar:     &cderr,
2617     OPnew:     &cderr,
2618     OPanew:    &cderr,
2619     OPdelete:  &cderr,
2620     OPadelete: &cderr,
2621     OPbrack:   &cderr,
2622     OPframeptr: &cdframeptr,
2623     OPgot:     &cdgot,
2624 
2625     OPbsf:     &cdbscan,
2626     OPbsr:     &cdbscan,
2627     OPbtst:    &cdbtst,
2628     OPbt:      &cdbt,
2629     OPbtc:     &cdbt,
2630     OPbtr:     &cdbt,
2631     OPbts:     &cdbt,
2632 
2633     OPbswap:   &cdbswap,
2634     OPpopcnt:  &cdpopcnt,
2635     OPvector:  &cdvector,
2636     OPvecsto:  &cdvecsto,
2637     OPvecfill: &cdvecfill,
2638     OPva_start: &cderr,
2639     OPprefetch: &cdprefetch,
2640 ];
2641 
2642 
2643 @trusted
2644 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag)
2645 {
2646     Symbol *s;
2647 
2648     debug if (debugw)
2649     {
2650         printf("+codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs),oper_str(e.Eoper));
2651         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
2652                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
2653         printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
2654     }
2655 
2656     assert(e);
2657     elem_debug(e);
2658     if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
2659     {
2660         debug
2661         {
2662             printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs));
2663             elem_print(e);
2664             printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
2665                     regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
2666             printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
2667         }
2668         assert(0);
2669     }
2670 
2671     if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar)
2672         *pretregs &= ~regcon.mvar;                      /* can't use register vars */
2673 
2674     uint op = e.Eoper;
2675     if (e.Ecount && e.Ecount != e.Ecomsub)     // if common subexp
2676     {
2677         comsub(cdb,e, *pretregs);
2678         goto L1;
2679     }
2680 
2681     if (configv.addlinenumbers && e.Esrcpos.Slinnum)
2682         cdb.genlinnum(e.Esrcpos);
2683 
2684     switch (op)
2685     {
2686         default:
2687             if (e.Ecount)                          /* if common subexp     */
2688             {
2689                 /* if no return value       */
2690                 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0)
2691                 {
2692                     if (*pretregs & (mST0 | mST01))
2693                     {
2694                         //printf("generate ST0 comsub for:\n");
2695                         //elem_print(e);
2696 
2697                         regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1;
2698                         (*cdxxx[op])(cdb,e,&retregs);
2699                         cssave(e,retregs,!OTleaf(op));
2700                         fixresult(cdb, e, retregs, pretregs);
2701                         goto L1;
2702                     }
2703                     if (tysize(e.Ety) == 1)
2704                         *pretregs |= BYTEREGS;
2705                     else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs)
2706                         *pretregs |= XMMREGS;
2707                     else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias)
2708                         *pretregs |= DOUBLEREGS;
2709                     else
2710                         *pretregs |= ALLREGS;       /* make one             */
2711                 }
2712 
2713                 /* BUG: For CSEs, make sure we have both an MSW             */
2714                 /* and an LSW specified in *pretregs                        */
2715             }
2716             assert(op <= OPMAX);
2717             (*cdxxx[op])(cdb,e,pretregs);
2718             break;
2719 
2720         case OPrelconst:
2721             cdrelconst(cdb,e,pretregs);
2722             break;
2723 
2724         case OPvar:
2725             if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg &&
2726                 (s.Sregm & *pretregs) == s.Sregm)
2727             {
2728                 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE)
2729                     *pretregs &= mPSW | (s.Sregm & mLSW);
2730                 else
2731                     *pretregs &= mPSW | s.Sregm;
2732             }
2733             goto case OPconst;
2734 
2735         case OPconst:
2736             if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile))
2737             {
2738                 switch (tybasic(e.Ety))
2739                 {
2740                     case TYbool:
2741                     case TYchar:
2742                     case TYschar:
2743                     case TYuchar:
2744                         *pretregs |= BYTEREGS;
2745                         break;
2746 
2747                     case TYnref:
2748                     case TYnptr:
2749                     case TYsptr:
2750                     case TYcptr:
2751                     case TYfgPtr:
2752                     case TYimmutPtr:
2753                     case TYsharePtr:
2754                     case TYrestrictPtr:
2755                         *pretregs |= I16 ? IDXREGS : ALLREGS;
2756                         break;
2757 
2758                     case TYshort:
2759                     case TYushort:
2760                     case TYint:
2761                     case TYuint:
2762                     case TYlong:
2763                     case TYulong:
2764                     case TYllong:
2765                     case TYullong:
2766                     case TYcent:
2767                     case TYucent:
2768                     case TYfptr:
2769                     case TYhptr:
2770                     case TYvptr:
2771                         *pretregs |= ALLREGS;
2772                         break;
2773 
2774                     default:
2775                         break;
2776                 }
2777             }
2778             loaddata(cdb,e,pretregs);
2779             break;
2780     }
2781     cssave(e,*pretregs,!OTleaf(op));
2782 L1:
2783     if (!(constflag & 2))
2784         freenode(e);
2785 
2786     debug if (debugw)
2787     {
2788         printf("-codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs), oper_str(op));
2789         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
2790                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
2791     }
2792 }
2793 
2794 /*******************************
2795  * Same as codelem(), but do not destroy the registers in keepmsk.
2796  * Use scratch registers as much as possible, then use stack.
2797  * Input:
2798  *      constflag       true if user of result will not modify the
2799  *                      registers returned in *pretregs.
2800  */
2801 
2802 @trusted
2803 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag)
2804 {
2805     regm_t touse;
2806 
2807     debug if (debugw)
2808         printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
2809                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
2810 
2811     elem_debug(e);
2812     if (constflag)
2813     {
2814         regm_t regm;
2815         reg_t reg;
2816 
2817         if (isregvar(e, regm, reg) &&           // if e is a register variable
2818             (regm & *pretregs) == regm &&       // in one of the right regs
2819             e.EV.Voffset == 0
2820            )
2821         {
2822             uint sz1 = tysize(e.Ety);
2823             uint sz2 = tysize(e.EV.Vsym.Stype.Tty);
2824             if (sz1 <= REGSIZE && sz2 > REGSIZE)
2825                 regm &= mLSW | XMMREGS;
2826             fixresult(cdb,e,regm,pretregs);
2827             cssave(e,regm,0);
2828             freenode(e);
2829 
2830             debug if (debugw)
2831                 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
2832                         e,regm_str(*pretregs),regm_str(keepmsk),constflag);
2833 
2834             return;
2835         }
2836     }
2837     regm_t overlap = msavereg & keepmsk;
2838     msavereg |= keepmsk;          /* add to mask of regs to save          */
2839     regm_t oldregcon = regcon.cse.mval;
2840     regm_t oldregimmed = regcon.immed.mval;
2841     regm_t oldmfuncreg = mfuncreg;       /* remember old one                     */
2842     mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar;
2843     uint stackpushsave = stackpush;
2844     char calledafuncsave = calledafunc;
2845     calledafunc = 0;
2846     CodeBuilder cdbx; cdbx.ctor();
2847     codelem(cdbx,e,pretregs,constflag);    // generate code for the elem
2848 
2849     regm_t tosave = keepmsk & ~msavereg; /* registers to save                    */
2850     if (tosave)
2851     {
2852         cgstate.stackclean++;
2853         genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg);
2854         cgstate.stackclean--;
2855     }
2856 
2857     /* Assert that no new CSEs are generated that are not reflected       */
2858     /* in mfuncreg.                                                       */
2859     debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0)
2860         printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n",
2861                 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar));
2862 
2863     assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0);
2864 
2865     /* https://issues.dlang.org/show_bug.cgi?id=3521
2866      * The problem is:
2867      *    reg op (reg = exp)
2868      * where reg must be preserved (in keepregs) while the expression to be evaluated
2869      * must change it.
2870      * The only solution is to make this variable not a register.
2871      */
2872     if (regcon.mvar & tosave)
2873     {
2874         //elem_print(e);
2875         //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave));
2876         cgreg_unregister(regcon.mvar & tosave);
2877     }
2878 
2879     /* which registers can we use to save other registers in? */
2880     if (config.flags4 & CFG4space ||              // if optimize for space
2881         config.target_cpu >= TARGET_80486)        // PUSH/POP ops are 1 cycle
2882         touse = 0;                              // PUSH/POP pairs are always shorter
2883     else
2884     {
2885         touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
2886         /* Don't use registers we'll have to save/restore               */
2887         touse &= ~(fregsaved & oldmfuncreg);
2888         /* Don't use registers that have constant values in them, since
2889            the code generated might have used the value.
2890          */
2891         touse &= ~oldregimmed;
2892     }
2893 
2894     CodeBuilder cdbs1; cdbs1.ctor();
2895     code *cs2 = null;
2896     int adjesp = 0;
2897 
2898     for (uint i = 0; tosave; i++)
2899     {
2900         regm_t mi = mask(i);
2901 
2902         assert(i < REGMAX);
2903         if (mi & tosave)        /* i = register to save                 */
2904         {
2905             if (touse)          /* if any scratch registers             */
2906             {
2907                 uint j;
2908                 for (j = 0; j < 8; j++)
2909                 {
2910                     regm_t mj = mask(j);
2911 
2912                     if (touse & mj)
2913                     {
2914                         genmovreg(cdbs1,j,i);
2915                         cs2 = cat(genmovreg(i,j),cs2);
2916                         touse &= ~mj;
2917                         mfuncreg &= ~mj;
2918                         regcon.used |= mj;
2919                         break;
2920                     }
2921                 }
2922                 assert(j < 8);
2923             }
2924             else                        // else use memory
2925             {
2926                 CodeBuilder cdby; cdby.ctor();
2927                 uint size = gensaverestore(mask(i), cdbs1, cdby);
2928                 cs2 = cat(cdby.finish(),cs2);
2929                 if (size)
2930                 {
2931                     stackchanged = 1;
2932                     adjesp += size;
2933                 }
2934             }
2935             getregs(cdbx,mi);
2936             tosave &= ~mi;
2937         }
2938     }
2939     CodeBuilder cdbs2; cdbs2.ctor();
2940     if (adjesp)
2941     {
2942         // If this is done an odd number of times, it
2943         // will throw off the 8 byte stack alignment.
2944         // We should *only* worry about this if a function
2945         // was called in the code generation by codelem().
2946         int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1);
2947         if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
2948         {
2949             regm_t mval_save = regcon.immed.mval;
2950             regcon.immed.mval = 0;      // prevent reghasvalue() optimizations
2951                                         // because c hasn't been executed yet
2952             cod3_stackadj(cdbs1, sz);
2953             regcon.immed.mval = mval_save;
2954             cdbs1.genadjesp(sz);
2955 
2956             cod3_stackadj(cdbs2, -sz);
2957             cdbs2.genadjesp(-sz);
2958         }
2959         cdbs2.append(cs2);
2960 
2961 
2962         cdbs1.genadjesp(adjesp);
2963         cdbs2.genadjesp(-adjesp);
2964     }
2965     else
2966         cdbs2.append(cs2);
2967 
2968     calledafunc |= calledafuncsave;
2969     msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save   */
2970     mfuncreg &= oldmfuncreg;        /* update original                    */
2971 
2972     debug if (debugw)
2973         printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
2974                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
2975 
2976     cdb.append(cdbs1);
2977     cdb.append(cdbx);
2978     cdb.append(cdbs2);
2979     return;
2980 }
2981 
2982 /*********************************************
2983  * Turn register mask into a string suitable for printing.
2984  */
2985 
2986 @trusted
2987 const(char)* regm_str(regm_t rm)
2988 {
2989     enum NUM = 10;
2990     enum SMAX = 128;
2991     __gshared char[SMAX + 1][NUM] str;
2992     __gshared int i;
2993 
2994     if (rm == 0)
2995         return "0";
2996     if (rm == ALLREGS)
2997         return "ALLREGS";
2998     if (rm == BYTEREGS)
2999         return "BYTEREGS";
3000     if (rm == allregs)
3001         return "allregs";
3002     if (rm == XMMREGS)
3003         return "XMMREGS";
3004     char *p = str[i].ptr;
3005     if (++i == NUM)
3006         i = 0;
3007     *p = 0;
3008     for (size_t j = 0; j < 32; j++)
3009     {
3010         if (mask(cast(uint)j) & rm)
3011         {
3012             strcat(p,regstring[j]);
3013             rm &= ~mask(cast(uint)j);
3014             if (rm)
3015                 strcat(p,"|");
3016         }
3017     }
3018     if (rm)
3019     {
3020         const pstrlen = strlen(p);
3021         char *s = p + pstrlen;
3022         snprintf(s, SMAX - pstrlen, "x%02x",rm);
3023     }
3024     assert(strlen(p) <= SMAX);
3025     return strdup(p);
3026 }
3027 
3028 /*********************************
3029  * Scan down comma-expressions.
3030  * Output:
3031  *      pe = first elem down right side that is not an OPcomma
3032  * Returns:
3033  *      code generated for left branches of comma-expressions
3034  */
3035 
3036 @trusted
3037 void docommas(ref CodeBuilder cdb, ref elem *pe)
3038 {
3039     uint stackpushsave = stackpush;
3040     int stackcleansave = cgstate.stackclean;
3041     cgstate.stackclean = 0;
3042     elem* e = pe;
3043     while (1)
3044     {
3045         if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3046         {
3047             cdb.genlinnum(e.Esrcpos);
3048             //e.Esrcpos.Slinnum = 0;               // don't do it twice
3049         }
3050         if (e.Eoper != OPcomma)
3051             break;
3052         regm_t retregs = 0;
3053         codelem(cdb,e.EV.E1,&retregs,true);
3054         elem* eold = e;
3055         e = e.EV.E2;
3056         freenode(eold);
3057     }
3058     pe = e;
3059     assert(cgstate.stackclean == 0);
3060     cgstate.stackclean = stackcleansave;
3061     genstackclean(cdb,stackpush - stackpushsave,0);
3062 }
3063 
3064 /**************************
3065  * For elems in regcon that don't match regconsave,
3066  * clear the corresponding bit in regcon.cse.mval.
3067  * Do same for regcon.immed.
3068  */
3069 
3070 @trusted
3071 void andregcon(ref con_t pregconsave)
3072 {
3073     regm_t m = ~1;
3074     foreach (i; 0 ..REGMAX)
3075     {
3076         if (pregconsave.cse.value[i] != regcon.cse.value[i])
3077             regcon.cse.mval &= m;
3078         if (pregconsave.immed.value[i] != regcon.immed.value[i])
3079             regcon.immed.mval &= m;
3080         m <<= 1;
3081         m |= 1;
3082     }
3083     //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval));
3084     regcon.used |= pregconsave.used;
3085     regcon.cse.mval &= pregconsave.cse.mval;
3086     regcon.immed.mval &= pregconsave.immed.mval;
3087     regcon.params &= pregconsave.params;
3088     //printf("regcon.cse.mval&regcon.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops));
3089     regcon.cse.mops &= regcon.cse.mval;
3090 }
3091 
3092 
3093 /**********************************************
3094  * Disassemble the code instruction bytes
3095  * Params:
3096  *    code = array of instruction bytes
3097  */
3098 @trusted
3099 private extern (D)
3100 void disassemble(ubyte[] code)
3101 {
3102     printf("%s:\n", funcsym_p.Sident.ptr);
3103     const model = I16 ? 16 : I32 ? 32 : 64;     // 16/32/64
3104     size_t i = 0;
3105     while (i < code.length)
3106     {
3107         printf("%04x:", cast(int)i);
3108         uint pc;
3109         const sz = dmd.backend.disasm86.calccodsize(code, cast(uint)i, pc, model);
3110 
3111         void put(char c) { printf("%c", c); }
3112 
3113         dmd.backend.disasm86.getopstring(&put, code, cast(uint)i, sz, model, model == 16, true,
3114                 null, null, null, null);
3115         printf("\n");
3116         i += sz;
3117     }
3118 }