1 /**
2  * Code generation 2
3  *
4  * Includes:
5  * - math operators (+ - * / %) and functions (abs, cos, sqrt)
6  * - 'string' functions (strlen, memcpy, memset)
7  * - pointers (address of / dereference)
8  * - struct assign, constructor, destructor
9  *
10  * Compiler implementation of the
11  * $(LINK2 https://www.dlang.org, D programming language).
12  *
13  * Copyright:   Copyright (C) 1984-1998 by Symantec
14  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
15  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
16  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
17  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d)
18  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d
19  */
20 
21 module dmd.backend.cod2;
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code;
31 import dmd.backend.code_x86;
32 import dmd.backend.codebuilder;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.global;
36 import dmd.backend.oper;
37 import dmd.backend.ty;
38 import dmd.backend.type;
39 import dmd.backend.xmm;
40 
41 
42 nothrow:
43 @safe:
44 
45 import dmd.backend.cg : segfl, stackfl;
46 
47 __gshared int cdcmp_flag;
48 
49 import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients;
50 
51 /*******************************
52  * Swap two registers.
53  */
54 
55 private void swap(reg_t *a,reg_t *b)
56 {
57     const tmp = *a;
58     *a = *b;
59     *b = tmp;
60 }
61 
62 
63 /*******************************************
64  * Returns: true if cannot use this EA in anything other than a MOV instruction.
65  */
66 
67 @trusted
68 bool movOnly(const elem *e)
69 {
70     if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar)
71     {
72         const s = e.EV.Vsym;
73         // Fixups for these can only be done with a MOV
74         if (s.Sclass == SC.global || s.Sclass == SC.extern_ ||
75             s.Sclass == SC.comdat || s.Sclass == SC.comdef)
76             return true;
77     }
78     return false;
79 }
80 
81 /********************************
82  * Determine index registers used by addressing mode.
83  * Index is rm of modregrm field.
84  * Returns:
85  *      mask of index registers
86  */
87 
88 regm_t idxregm(const code* c)
89 {
90     const rm = c.Irm;
91     regm_t idxm;
92     if ((rm & 0xC0) != 0xC0)            /* if register is not the destination */
93     {
94         if (I16)
95         {
96             static immutable ubyte[8] idxrm  = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX];
97             idxm = idxrm[rm & 7];
98         }
99         else
100         {
101             if ((rm & 7) == 4)          /* if sib byte                  */
102             {
103                 const sib = c.Isib;
104                 reg_t idxreg = (sib >> 3) & 7;
105                 // scaled index reg
106                 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0));
107 
108                 if ((sib & 7) == 5 && (rm & 0xC0) == 0)
109                 { }
110                 else
111                     idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0));
112             }
113             else
114                 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0));
115         }
116     }
117     return idxm;
118 }
119 
120 
121 /***************************
122  * Gen code for call to floating point routine.
123  */
124 
125 @trusted
126 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib)
127 {
128     if (config.inline8087)
129     {
130         orth87(cdb,e,pretregs);
131         return;
132     }
133 
134     regm_t retregs1,retregs2;
135     if (tybasic(e.EV.E1.Ety) == TYfloat)
136     {
137         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
138         retregs1 = FLOATREGS;
139         retregs2 = FLOATREGS2;
140     }
141     else
142     {
143         if (I32)
144         {   retregs1 = DOUBLEREGS_32;
145             retregs2 = DOUBLEREGS2_32;
146         }
147         else
148         {   retregs1 = mSTACK;
149             retregs2 = DOUBLEREGS_16;
150         }
151     }
152 
153     codelem(cdb,e.EV.E1, &retregs1,false);
154     if (retregs1 & mSTACK)
155         cgstate.stackclean++;
156     scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false);
157     if (retregs1 & mSTACK)
158         cgstate.stackclean--;
159     callclib(cdb, e, clib, pretregs, 0);
160 }
161 
162 /*****************************
163  * Handle operators which are more or less orthogonal
164  * ( + - & | ^ )
165  */
166 
167 @trusted
168 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
169 {
170     //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
171     elem *e1 = e.EV.E1;
172     elem *e2 = e.EV.E2;
173     if (*pretregs == 0)                   // if don't want result
174     {
175         codelem(cdb,e1,pretregs,false); // eval left leaf
176         *pretregs = 0;                          // in case they got set
177         codelem(cdb,e2,pretregs,false);
178         return;
179     }
180 
181     const ty = tybasic(e.Ety);
182     const ty1 = tybasic(e1.Ety);
183 
184     if (tyfloating(ty1))
185     {
186         if (tyvector(ty1) ||
187             config.fpxmmregs && tyxmmreg(ty1) &&
188             !(*pretregs & mST0) &&
189             !(*pretregs & mST01) &&
190             !(ty == TYldouble || ty == TYildouble)  // watch out for shrinkLongDoubleConstantIfPossible()
191            )
192         {
193             orthxmm(cdb,e,pretregs);
194             return;
195         }
196         if (config.inline8087)
197         {
198             orth87(cdb,e,pretregs);
199             return;
200         }
201         if (config.exe & EX_windos)
202         {
203             opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd
204                                                        : CLIB.dsub);
205             return;
206         }
207         else
208         {
209             assert(0);
210         }
211     }
212     if (tyxmmreg(ty1))
213     {
214         orthxmm(cdb,e,pretregs);
215         return;
216     }
217 
218     opcode_t op1, op2;
219     uint mode;
220     __gshared int nest;
221 
222     const ty2 = tybasic(e2.Ety);
223     const e2oper = e2.Eoper;
224     const sz = _tysize[ty];
225     const isbyte = (sz == 1);
226     code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
227     bool test = false;                // assume we destroyed lvalue
228 
229     switch (e.Eoper)
230     {
231         case OPadd:     mode = 0;
232                         op1 = 0x03; op2 = 0x13; break;  /* ADD, ADC     */
233         case OPmin:     mode = 5;
234                         op1 = 0x2B; op2 = 0x1B; break;  /* SUB, SBB     */
235         case OPor:      mode = 1;
236                         op1 = 0x0B; op2 = 0x0B; break;  /* OR , OR      */
237         case OPxor:     mode = 6;
238                         op1 = 0x33; op2 = 0x33; break;  /* XOR, XOR     */
239         case OPand:     mode = 4;
240                         op1 = 0x23; op2 = 0x23;         /* AND, AND     */
241                         if (tyreg(ty1) &&
242                             *pretregs == mPSW)          /* if flags only */
243                         {
244                             test = true;
245                             op1 = 0x85;                 /* TEST         */
246                             mode = 0;
247                         }
248                         break;
249 
250         default:
251             assert(0);
252     }
253     op1 ^= isbyte;                                  /* if byte operation    */
254 
255     // Compute numwords, the number of words to operate on.
256     int numwords = 1;
257     if (!I16)
258     {
259         /* Cannot operate on longs and then do a 'paint' to a far       */
260         /* pointer, because far pointers are 48 bits and longs are 32.  */
261         /* Therefore, numwords can never be 2.                          */
262         assert(!(tyfv(ty1) && tyfv(ty2)));
263         if (sz == 2 * REGSIZE)
264         {
265             numwords++;
266         }
267     }
268     else
269     {
270         /* If ty is a TYfptr, but both operands are long, treat the     */
271         /* operation as a long.                                         */
272         if ((tylong(ty1) || ty1 == TYhptr) &&
273             (tylong(ty2) || ty2 == TYhptr))
274             numwords++;
275     }
276 
277     // Special cases where only flags are set
278     if (test && _tysize[ty1] <= REGSIZE &&
279         (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
280         && !movOnly(e1)
281        )
282     {
283         // Handle the case of (var & const)
284         if (e2.Eoper == OPconst && el_signx32(e2))
285         {
286             code cs = void;
287             cs.Iflags = 0;
288             cs.Irex = 0;
289             getlvalue(cdb,&cs,e1,0);
290             targ_size_t value = e2.EV.Vpointer;
291             if (sz == 2)
292                 value &= 0xFFFF;
293             else if (sz == 4)
294                 value &= 0xFFFFFFFF;
295             reg_t reg;
296             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,reg))
297             {
298                 code_newreg(&cs, reg);
299                 if (I64 && isbyte && reg >= 4)
300                     cs.Irex |= REX;
301             }
302             else
303             {
304                 if (sz == 8 && !I64)
305                 {
306                     assert(value == cast(int)value);    // sign extend imm32
307                 }
308                 op1 = 0xF7;
309                 cs.IEV2.Vint = cast(targ_int)value;
310                 cs.IFL2 = FLconst;
311             }
312             cs.Iop = op1 ^ isbyte;
313             cs.Iflags |= word | CFpsw;
314             freenode(e1);
315             freenode(e2);
316             cdb.gen(&cs);
317             return;
318         }
319 
320         // Handle (exp & reg)
321         reg_t reg;
322         regm_t retregs;
323         if (isregvar(e2,retregs,reg))
324         {
325             code cs = void;
326             cs.Iflags = 0;
327             cs.Irex = 0;
328             getlvalue(cdb,&cs,e1,0);
329             code_newreg(&cs, reg);
330             if (I64 && isbyte && reg >= 4)
331                 cs.Irex |= REX;
332             cs.Iop = op1 ^ isbyte;
333             cs.Iflags |= word | CFpsw;
334             freenode(e1);
335             freenode(e2);
336             cdb.gen(&cs);
337             return;
338         }
339     }
340 
341     code cs = void;
342     cs.Iflags = 0;
343     cs.Irex = 0;
344 
345     // Look for possible uses of LEA
346     if (e.Eoper == OPadd &&
347         !(*pretregs & mPSW) &&                // flags aren't set by LEA
348         !nest &&                              // could cause infinite recursion if e.Ecount
349         (sz == REGSIZE || (I64 && sz == 4)))  // far pointers aren't handled
350     {
351         const rex = (sz == 8) ? REX_W : 0;
352 
353         // Handle the case of (e + &var)
354         int e1oper = e1.Eoper;
355         if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)])))
356                 || // LEA costs too much for simple EAs on older CPUs
357             (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) ||
358             (!I16 && (isscaledindex(e1) || isscaledindex(e2))) ||
359             (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) ||
360             (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) ||
361             (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount &&
362              (e1oper == OPmul || e1oper == OPshl) &&
363              e1.EV.E2.Eoper == OPconst &&
364              ssindex(e1oper,e1.EV.E2.EV.Vuns)
365             ) ||
366             (!I16 && e1.Ecount)
367            )
368         {
369             const inc = e.Ecount != 0;
370             nest += inc;
371             code csx = void;
372             getlvalue(cdb,&csx,e,0);
373             nest -= inc;
374             reg_t regx;
375             allocreg(cdb,pretregs,&regx,ty);
376             csx.Iop = LEA;
377             code_newreg(&csx, regx);
378             cdb.gen(&csx);          // LEA regx,EA
379             if (rex)
380                 code_orrex(cdb.last(), rex);
381             return;
382         }
383 
384         // Handle the case of ((e + c) + e2)
385         if (!I16 &&
386             e1oper == OPadd &&
387             (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) ||
388              e2oper == OPconst && el_signx32(e2)) &&
389             !e1.Ecount
390            )
391         {
392             elem *ebase;
393             elem *edisp;
394             if (e2oper == OPconst && el_signx32(e2))
395             {   edisp = e2;
396                 ebase = e1.EV.E2;
397             }
398             else
399             {   edisp = e1.EV.E2;
400                 ebase = e2;
401             }
402 
403             auto e11 = e1.EV.E1;
404             regm_t retregs = *pretregs & ALLREGS;
405             if (!retregs)
406                 retregs = ALLREGS;
407             int ss = 0;
408             int ss2 = 0;
409 
410             // Handle the case of (((e *  c1) + c2) + e2)
411             // Handle the case of (((e << c1) + c2) + e2)
412             if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
413                 e11.EV.E2.Eoper == OPconst &&
414                 !e11.Ecount
415                )
416             {
417                 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2);
418                 if (e11.Eoper == OPshl)
419                 {
420                     if (co1 > 3)
421                         goto L13;
422                     ss = cast(int)co1;
423                 }
424                 else
425                 {
426                     ss2 = 1;
427                     switch (co1)
428                     {
429                         case  6:        ss = 1;                 break;
430                         case 12:        ss = 1; ss2 = 2;        break;
431                         case 24:        ss = 1; ss2 = 3;        break;
432                         case 10:        ss = 2;                 break;
433                         case 20:        ss = 2; ss2 = 2;        break;
434                         case 40:        ss = 2; ss2 = 3;        break;
435                         case 18:        ss = 3;                 break;
436                         case 36:        ss = 3; ss2 = 2;        break;
437                         case 72:        ss = 3; ss2 = 3;        break;
438                         default:
439                             ss2 = 0;
440                             goto L13;
441                     }
442                 }
443                 freenode(e11.EV.E2);
444                 freenode(e11);
445                 e11 = e11.EV.E1;
446               L13:
447                 { }
448             }
449 
450             reg_t reg11;
451             regm_t regm;
452             if (e11.Eoper == OPvar && isregvar(e11,regm,reg11))
453             {
454                 if (tysize(e11.Ety) <= REGSIZE)
455                     retregs = mask(reg11); // only want the LSW
456                 else
457                     retregs = regm;
458                 freenode(e11);
459             }
460             else
461                 codelem(cdb,e11,&retregs,false);
462 
463             regm_t rretregs = ALLREGS & ~retregs & ~mBP;
464             scodelem(cdb,ebase,&rretregs,retregs,true);
465             reg_t reg;
466             {
467                 regm_t sregs = *pretregs & ~rretregs;
468                 if (!sregs)
469                     sregs = ALLREGS & ~rretregs;
470                 allocreg(cdb,&sregs,&reg,ty);
471             }
472 
473             assert((retregs & (retregs - 1)) == 0); // must be only one register
474             assert((rretregs & (rretregs - 1)) == 0); // must be only one register
475 
476             auto  reg1 = findreg(retregs);
477             const reg2 = findreg(rretregs);
478 
479             if (ss2)
480             {
481                 assert(reg != reg2);
482                 if ((reg1 & 7) == BP)
483                 {   static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
484 
485                     // IMUL reg,imm32
486                     cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]);
487                 }
488                 else
489                 {   // LEA reg,[reg1*ss][reg1]
490                     cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7));
491                     if (reg1 & 8)
492                         code_orrex(cdb.last(), REX_X | REX_B);
493                 }
494                 if (rex)
495                     code_orrex(cdb.last(), rex);
496                 reg1 = reg;
497                 ss = ss2;                               // use *2 for scale
498             }
499 
500             cs.Iop = LEA;                      // LEA reg,c[reg1*ss][reg2]
501             cs.Irm = modregrm(2,reg & 7,4);
502             cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7);
503             assert(reg2 != BP);
504             cs.Iflags = CFoff;
505             cs.Irex = cast(ubyte)rex;
506             if (reg & 8)
507                 cs.Irex |= REX_R;
508             if (reg1 & 8)
509                 cs.Irex |= REX_X;
510             if (reg2 & 8)
511                 cs.Irex |= REX_B;
512             cs.IFL1 = FLconst;
513             cs.IEV1.Vsize_t = edisp.EV.Vuns;
514 
515             freenode(edisp);
516             freenode(e1);
517             cdb.gen(&cs);
518             fixresult(cdb,e,mask(reg),pretregs);
519             return;
520         }
521     }
522 
523     regm_t posregs = (isbyte) ? BYTEREGS : (mES | allregs);
524     regm_t retregs = *pretregs & posregs;
525     if (retregs == 0)                   /* if no return regs speced     */
526                                         /* (like if wanted flags only)  */
527         retregs = ALLREGS & posregs;    // give us some
528 
529     if (ty1 == TYhptr || ty2 == TYhptr)
530     {     /* Generate code for add/subtract of huge pointers.
531            No attempt is made to generate very good code.
532          */
533         retregs = (retregs & mLSW) | mDX;
534         regm_t rretregs;
535         if (ty1 == TYhptr)
536         {   // hptr +- long
537             rretregs = mLSW & ~(retregs | regcon.mvar);
538             if (!rretregs)
539                 rretregs = mLSW;
540             rretregs |= mCX;
541             codelem(cdb,e1,&rretregs,0);
542             retregs &= ~rretregs;
543             if (!(retregs & mLSW))
544                 retregs |= mLSW & ~rretregs;
545 
546             scodelem(cdb,e2,&retregs,rretregs,true);
547         }
548         else
549         {   // long + hptr
550             codelem(cdb,e1,&retregs,0);
551             rretregs = (mLSW | mCX) & ~retregs;
552             if (!(rretregs & mLSW))
553                 rretregs |= mLSW;
554             scodelem(cdb,e2,&rretregs,retregs,true);
555         }
556         getregs(cdb,rretregs | retregs);
557         const mreg = DX;
558         const lreg = findreglsw(retregs);
559         if (e.Eoper == OPmin)
560         {   // negate retregs
561             cdb.gen2(0xF7,modregrm(3,3,mreg));     // NEG mreg
562             cdb.gen2(0xF7,modregrm(3,3,lreg));     // NEG lreg
563             code_orflag(cdb.last(),CFpsw);
564             cdb.genc2(0x81,modregrm(3,3,mreg),0);  // SBB mreg,0
565         }
566         const lrreg = findreglsw(rretregs);
567         genregs(cdb,0x03,lreg,lrreg);              // ADD lreg,lrreg
568         code_orflag(cdb.last(),CFpsw);
569         genmovreg(cdb,lrreg,CX);      // MOV lrreg,CX
570         cdb.genc2(0x81,modregrm(3,2,mreg),0);      // ADC mreg,0
571         genshift(cdb);                             // MOV CX,offset __AHSHIFT
572         cdb.gen2(0xD3,modregrm(3,4,mreg));         // SHL mreg,CL
573         genregs(cdb,0x03,mreg,lrreg);              // ADD mreg,MSREG(h)
574         fixresult(cdb,e,retregs,pretregs);
575         return;
576     }
577 
578     regm_t rretregs;
579     reg_t reg;
580     if (_tysize[ty1] > REGSIZE && numwords == 1)
581     {     /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */
582 
583         debug
584         if (_tysize[ty2] != REGSIZE)
585         {
586             printf("e = %p, e.Eoper = %s e1.Ety = %s e2.Ety = %s\n", e, oper_str(e.Eoper), tym_str(ty1), tym_str(ty2));
587             elem_print(e);
588         }
589 
590         assert(_tysize[ty2] == REGSIZE);
591 
592         /* Watch out for the case here where you are going to OP reg,EA */
593         /* and both the reg and EA use ES! Prevent this by forcing      */
594         /* reg into the regular registers.                              */
595         if ((e2oper == OPind ||
596             (e2oper == OPvar && el_fl(e2) == FLfardata)) &&
597             !e2.Ecount)
598         {
599             retregs = ALLREGS;
600         }
601 
602         codelem(cdb,e1,&retregs,test != 0);
603         reg = findreglsw(retregs);      /* reg is the register with the offset*/
604     }
605     else
606     {
607         regm_t regm;
608         reg_t regx;
609 
610         /* if (tyword + TYfptr) */
611         if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE)
612         {   retregs = ~*pretregs & ALLREGS;
613 
614             /* if retregs doesn't have any regs in it that aren't reg vars */
615             if ((retregs & ~regcon.mvar) == 0)
616                 retregs |= mAX;
617         }
618         else if (numwords == 2 && retregs & mES)
619             retregs = (retregs | mMSW) & ALLREGS;
620 
621         // Determine if we should swap operands, because
622         //      mov     EAX,x
623         //      add     EAX,reg
624         // is faster than:
625         //      mov     EAX,reg
626         //      add     EAX,x
627         else if (e2oper == OPvar &&
628                  e1.Eoper == OPvar &&
629                  e.Eoper != OPmin &&
630                  isregvar(e1,regm,regx) &&
631                  regm != retregs &&
632                  _tysize[ty1] == _tysize[ty2])
633         {
634             elem *es = e1;
635             e1 = e2;
636             e2 = es;
637         }
638         codelem(cdb,e1,&retregs,test != 0);         // eval left leaf
639         reg = findreg(retregs);
640     }
641     reg_t rreg;
642     int rval;
643     targ_size_t i;
644     switch (e2oper)
645     {
646         case OPind:                                 /* if addressing mode   */
647             if (!e2.Ecount)                         /* if not CSE           */
648                     goto L1;                        /* try OP reg,EA        */
649             goto default;
650 
651         default:                                    /* operator node        */
652         L2:
653             rretregs = ALLREGS & ~retregs;
654             /* Be careful not to do arithmetic on ES        */
655             if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW)
656                 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs;
657             else if (isbyte)
658                 rretregs &= BYTEREGS;
659 
660             scodelem(cdb,e2,&rretregs,retregs,true);       // get rvalue
661             rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs);
662             if (!test)
663                 getregs(cdb,retregs);          // we will trash these regs
664             if (numwords == 1)                              /* ADD reg,rreg */
665             {
666                 /* reverse operands to avoid moving around the segment value */
667                 if (_tysize[ty2] > REGSIZE)
668                 {
669                     getregs(cdb,rretregs);
670                     genregs(cdb,op1,rreg,reg);
671                     retregs = rretregs;     // reverse operands
672                 }
673                 else
674                 {
675                     genregs(cdb,op1,reg,rreg);
676                     if (!I16 && *pretregs & mPSW)
677                         cdb.last().Iflags |= word;
678                 }
679                 if (I64 && sz == 8)
680                     code_orrex(cdb.last(), REX_W);
681                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
682                     code_orrex(cdb.last(), REX);
683             }
684             else /* numwords == 2 */                /* ADD lsreg,lsrreg     */
685             {
686                 reg = findreglsw(retregs);
687                 rreg = findreglsw(rretregs);
688                 genregs(cdb,op1,reg,rreg);
689                 if (e.Eoper == OPadd || e.Eoper == OPmin)
690                     code_orflag(cdb.last(),CFpsw);
691                 reg = findregmsw(retregs);
692                 rreg = findregmsw(rretregs);
693                 if (!(e2oper == OPu16_32 && // if second operand is 0
694                       (op2 == 0x0B || op2 == 0x33)) // and OR or XOR
695                    )
696                     genregs(cdb,op2,reg,rreg);        // ADC msreg,msrreg
697             }
698             break;
699 
700         case OPrelconst:
701             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
702                 goto default;
703             if (sz != REGSIZE)
704                 goto L2;
705             if (segfl[el_fl(e2)] != 3)              /* if not in data segment */
706                 goto L2;
707             if (evalinregister(e2))
708                 goto L2;
709             cs.IEV2.Voffset = e2.EV.Voffset;
710             cs.IEV2.Vsym = e2.EV.Vsym;
711             cs.Iflags |= CFoff;
712             i = 0;                          /* no INC or DEC opcode         */
713             rval = 0;
714             goto L3;
715 
716         case OPconst:
717             if (tyfv(ty2))
718                 goto L2;
719             if (numwords == 1)
720             {
721                 if (!el_signx32(e2))
722                     goto L2;
723                 i = e2.EV.Vpointer;
724                 if (word)
725                 {
726                     if (!(*pretregs & mPSW) &&
727                         config.flags4 & CFG4speed &&
728                         (e.Eoper == OPor || e.Eoper == OPxor || test ||
729                          (e1.Eoper != OPvar && e1.Eoper != OPind)))
730                     {   word = 0;
731                         i &= 0xFFFF;
732                     }
733                 }
734                 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,rreg);
735                 cs.IEV2.Vsize_t = i;
736             L3:
737                 if (!test)
738                     getregs(cdb,retregs);          // we will trash these regs
739                 op1 ^= isbyte;
740                 cs.Iflags |= word;
741                 if (rval)
742                 {   cs.Iop = op1 ^ 2;
743                     mode = rreg;
744                 }
745                 else
746                     cs.Iop = 0x81;
747                 cs.Irm = modregrm(3,mode&7,reg&7);
748                 if (mode & 8)
749                     cs.Irex |= REX_R;
750                 if (reg & 8)
751                     cs.Irex |= REX_B;
752                 if (I64 && sz == 8)
753                     cs.Irex |= REX_W;
754                 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4)))
755                     cs.Irex |= REX;
756                 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2));
757                 /* Modify instruction for special cases */
758                 switch (e.Eoper)
759                 {
760                     case OPadd:
761                     {
762                         int iop;
763 
764                         if (i == 1)
765                             iop = 0;                    /* INC reg      */
766                         else if (i == -1)
767                             iop = 8;                    /* DEC reg      */
768                         else
769                             break;
770                         cs.Iop = (0x40 | iop | reg) ^ isbyte;
771                         if ((isbyte && *pretregs & mPSW) || I64)
772                         {
773                             cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop);
774                             cs.Iop = 0xFF;
775                         }
776                         break;
777                     }
778 
779                     case OPand:
780                         if (test)
781                             cs.Iop = rval ? op1 : 0xF7; // TEST
782                         break;
783 
784                     default:
785                         break;
786                 }
787                 if (*pretregs & mPSW)
788                     cs.Iflags |= CFpsw;
789                 cs.Iop ^= isbyte;
790                 cdb.gen(&cs);
791                 cs.Iflags &= ~CFpsw;
792             }
793             else if (numwords == 2)
794             {
795                 getregs(cdb,retregs);
796                 reg = findregmsw(retregs);
797                 const lsreg = findreglsw(retregs);
798                 cs.Iop = 0x81;
799                 cs.Irm = modregrm(3,mode,lsreg);
800                 cs.IFL2 = FLconst;
801                 const msw = cast(targ_int)MSREG(e2.EV.Vllong);
802                 cs.IEV2.Vint = e2.EV.Vlong;
803                 switch (e.Eoper)
804                 {
805                     case OPadd:
806                     case OPmin:
807                         cs.Iflags |= CFpsw;
808                         break;
809 
810                     default:
811                         break;
812                 }
813                 cdb.gen(&cs);
814                 cs.Iflags &= ~CFpsw;
815 
816                 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg);
817                 cs.IEV2.Vint = msw;
818                 if (e.Eoper == OPadd)
819                     cs.Irm |= modregrm(0,2,0);      /* ADC          */
820                 cdb.gen(&cs);
821             }
822             else
823                 assert(0);
824             freenode(e2);
825             break;
826 
827         case OPvar:
828             if (movOnly(e2))
829                 goto L2;
830         L1:
831             if (tyfv(ty2))
832                 goto L2;
833             if (!test)
834                 getregs(cdb,retregs);          // we will trash these regs
835             loadea(cdb,e2,&cs,op1,
836                    ((numwords == 2) ? findreglsw(retregs) : reg),
837                    0,retregs,retregs);
838             if (!I16 && word)
839             {   if (*pretregs & mPSW)
840                     code_orflag(cdb.last(),word);
841                 else
842                     cdb.last().Iflags &= ~cast(int)word;
843             }
844             else if (numwords == 2)
845             {
846                 if (e.Eoper == OPadd || e.Eoper == OPmin)
847                     code_orflag(cdb.last(),CFpsw);
848                 reg = findregmsw(retregs);
849                 if (!OTleaf(e2.Eoper))
850                 {   getlvalue_msw(&cs);
851                     cs.Iop = op2;
852                     NEWREG(cs.Irm,reg);
853                     cdb.gen(&cs);                 // ADC reg,data+2
854                 }
855                 else
856                     loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0);
857             }
858             else if (I64 && sz == 8)
859                 code_orrex(cdb.last(), REX_W);
860             freenode(e2);
861             break;
862     }
863 
864     if (sz <= REGSIZE && *pretregs & mPSW)
865     {
866         /* If the expression is (_tls_array + ...), then the flags are not set
867          * since the linker may rewrite these instructions into something else.
868          */
869         if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar)
870         {
871             const s = e1.EV.Vsym;
872             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
873             {
874                 goto L7;                        // don't assume flags are set
875             }
876         }
877         code_orflag(cdb.last(),CFpsw);
878         *pretregs &= ~mPSW;                    // flags already set
879     L7: { }
880     }
881     fixresult(cdb,e,retregs,pretregs);
882 }
883 
884 
885 /*****************************
886  * Handle multiply.
887  */
888 
889 @trusted
890 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
891 {
892     //printf("cdmul()\n");
893     elem *e1 = e.EV.E1;
894     elem *e2 = e.EV.E2;
895     if (*pretregs == 0)                         // if don't want result
896     {
897         codelem(cdb,e1,pretregs,false);      // eval left leaf
898         *pretregs = 0;                          // in case they got set
899         codelem(cdb,e2,pretregs,false);
900         return;
901     }
902 
903     //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
904     const tyml = tybasic(e1.Ety);
905     const ty = tybasic(e.Ety);
906     const oper = e.Eoper;
907 
908     if (tyfloating(tyml))
909     {
910         if (tyvector(tyml) ||
911             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
912             !(*pretregs & mST0) &&
913             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
914             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
915             !(ty == TYllong)  //   or passing to function through integer register
916            )
917         {
918             orthxmm(cdb,e,pretregs);
919             return;
920         }
921         if (config.exe & EX_posix)
922             orth87(cdb,e,pretregs);
923         else
924             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
925 
926         return;
927     }
928 
929     if (tyxmmreg(tyml))
930     {
931         orthxmm(cdb,e,pretregs);
932         return;
933     }
934 
935     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if signed operation, 0 if unsigned
936     const isbyte = tybyte(e.Ety) != 0;
937     const sz = _tysize[tyml];
938     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
939     const uint grex = rex << 16;
940     const OPER opunslng = I16 ? OPu16_32 : OPu32_64;
941 
942     code cs = void;
943     cs.Iflags = 0;
944     cs.Irex = 0;
945 
946     switch (e2.Eoper)
947     {
948         case OPu16_32:
949         case OPs16_32:
950         case OPu32_64:
951         case OPs32_64:
952         {
953             if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper ||
954                 e1.Ecount || e2.Ecount)
955                 goto default;
956             const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5;
957             regm_t retregsx = mAX;
958             codelem(cdb,e1.EV.E1,&retregsx,false);    // eval left leaf
959             if (e2.EV.E1.Eoper == OPvar ||
960                 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount)
961                )
962             {
963                 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX);
964             }
965             else
966             {
967                 regm_t rretregsx = ALLREGS & ~mAX;
968                 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue
969                 getregs(cdb,mAX | mDX);
970                 const rregx = findreg(rretregsx);
971                 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx
972             }
973             freenode(e.EV.E1);
974             freenode(e2);
975             fixresult(cdb,e,mAX | mDX,pretregs);
976             return;
977         }
978 
979         case OPconst:
980             const e2factor = cast(targ_size_t)el_tolong(e2);
981 
982             // Multiply by a constant
983             if (I32 && sz == REGSIZE * 2)
984             {
985                 /*  if (msw)
986                       IMUL    EDX,EDX,lsw
987                       IMUL    reg,EAX,msw
988                       ADD     reg,EDX
989                     else
990                       IMUL    reg,EDX,lsw
991                     MOV       EDX,lsw
992                     MUL       EDX
993                     ADD       EDX,reg
994                  */
995                 regm_t retregs = mAX | mDX;
996                 codelem(cdb,e1,&retregs,false);    // eval left leaf
997                 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX));
998                 getregs(cdb,mDX | mAX);
999 
1000                 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1001                 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1002 
1003                 if (msw)
1004                 {
1005                     genmulimm(cdb,DX,DX,lsw);           // IMUL EDX,EDX,lsw
1006                     genmulimm(cdb,reg,AX,msw);          // IMUL reg,EAX,msw
1007                     cdb.gen2(0x03,modregrm(3,reg,DX));  // ADD  reg,EAX
1008                 }
1009                 else
1010                     genmulimm(cdb,reg,DX,lsw);          // IMUL reg,EDX,lsw
1011 
1012                 movregconst(cdb,DX,lsw,0);              // MOV EDX,lsw
1013                 getregs(cdb,mDX);
1014                 cdb.gen2(0xF7,modregrm(3,4,DX));        // MUL EDX
1015                 cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD EDX,reg
1016 
1017                 const resregx = mDX | mAX;
1018                 freenode(e2);
1019                 fixresult(cdb,e,resregx,pretregs);
1020                 return;
1021             }
1022 
1023 
1024             const int pow2 = ispow2(e2factor);
1025 
1026             if (sz > REGSIZE || !el_signx32(e2))
1027                 goto default;
1028 
1029             if (config.target_cpu >= TARGET_80286)
1030             {
1031                 if (I32 || I64)
1032                 {
1033                     // See if we can use an LEA instruction
1034                     int ss;
1035                     int ss2 = 0;
1036                     int shift;
1037 
1038                     switch (e2factor)
1039                     {
1040                         case 12:    ss = 1; ss2 = 2; goto L4;
1041                         case 24:    ss = 1; ss2 = 3; goto L4;
1042 
1043                         case 6:
1044                         case 3:     ss = 1; goto L4;
1045 
1046                         case 20:    ss = 2; ss2 = 2; goto L4;
1047                         case 40:    ss = 2; ss2 = 3; goto L4;
1048 
1049                         case 10:
1050                         case 5:     ss = 2; goto L4;
1051 
1052                         case 36:    ss = 3; ss2 = 2; goto L4;
1053                         case 72:    ss = 3; ss2 = 3; goto L4;
1054 
1055                         case 18:
1056                         case 9:     ss = 3; goto L4;
1057 
1058                         L4:
1059                         {
1060                             regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13);
1061                             if (!resreg)
1062                                 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13);
1063 
1064                             codelem(cdb,e.EV.E1,&resreg,false);
1065                             getregs(cdb,resreg);
1066                             reg_t reg = findreg(resreg);
1067 
1068                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1069                                         modregxrmx(ss,reg,reg));        // LEA reg,[ss*reg][reg]
1070                             assert((reg & 7) != BP);
1071                             if (ss2)
1072                             {
1073                                 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1074                                                modregxrm(ss2,reg,5));
1075                                 cdb.last().IFL1 = FLconst;
1076                                 cdb.last().IEV1.Vint = 0;               // LEA reg,0[ss2*reg]
1077                             }
1078                             else if (!(e2factor & 1))                   // if even factor
1079                             {
1080                                 genregs(cdb,0x03,reg,reg);              // ADD reg,reg
1081                                 code_orrex(cdb.last(),rex);
1082                             }
1083                             freenode(e2);
1084                             fixresult(cdb,e,resreg,pretregs);
1085                             return;
1086                         }
1087                         case 37:
1088                         case 74:    shift = 2;
1089                                     goto L5;
1090                         case 13:
1091                         case 26:    shift = 0;
1092                                     goto L5;
1093                         L5:
1094                         {
1095                             regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1096                             regm_t resreg = *pretregs & (ALLREGS | mBP);
1097                             if (!resreg)
1098                                 resreg = retregs;
1099 
1100                             // Don't use EBP
1101                             resreg &= ~(mBP | mR13);
1102                             if (!resreg)
1103                                 resreg = retregs;
1104                             reg_t reg;
1105                             allocreg(cdb,&resreg,&reg,TYint);
1106 
1107                             regm_t sregm = (ALLREGS & ~mR13) & ~resreg;
1108                             codelem(cdb,e.EV.E1,&sregm,false);
1109                             uint sreg = findreg(sregm);
1110                             getregs(cdb,resreg | sregm);
1111                             assert((sreg & 7) != BP);
1112                             assert((reg & 7) != BP);
1113                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1114                                                   modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1115                             if (shift)
1116                                 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1117                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1118                                                   modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1119                             if (!(e2factor & 1))                                  // if even factor
1120                             {
1121                                 genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1122                                 code_orrex(cdb.last(),rex);
1123                             }
1124                             freenode(e2);
1125                             fixresult(cdb,e,resreg,pretregs);
1126                             return;
1127                         }
1128 
1129                         default:
1130                             break;
1131                     }
1132                 }
1133 
1134                 regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1135                 regm_t resreg = *pretregs & (ALLREGS | mBP);
1136                 if (!resreg)
1137                     resreg = retregs;
1138 
1139                 scodelem(cdb,e.EV.E1,&retregs,0,true);     // eval left leaf
1140                 const regx = findreg(retregs);
1141                 reg_t rreg;
1142                 allocreg(cdb,&resreg,&rreg,e.Ety);
1143 
1144                 // IMUL regx,imm16
1145                 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor);
1146                 freenode(e2);
1147                 fixresult(cdb,e,resreg,pretregs);
1148                 return;
1149             }
1150             goto default;
1151 
1152         case OPind:
1153             if (!e2.Ecount)                        // if not CSE
1154                     goto case OPvar;                        // try OP reg,EA
1155             goto default;
1156 
1157         default:                                    // OPconst and operators
1158             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1159             if (sz <= REGSIZE)
1160             {
1161                 regm_t retregs = mAX;
1162                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1163                 regm_t rretregs = isbyte ? BYTEREGS & ~mAX
1164                                          : ALLREGS & ~(mAX|mDX);
1165                 scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1166                 getregs(cdb,mAX | mDX);     // trash these regs
1167                 reg_t rreg = findreg(rretregs);
1168                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg
1169                 if (I64 && isbyte && rreg >= 4)
1170                     code_orrex(cdb.last(), REX);
1171                 fixresult(cdb,e,mAX,pretregs);
1172                 return;
1173             }
1174             else if (sz == 2 * REGSIZE)
1175             {
1176                 regm_t retregs = mDX | mAX;
1177                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1178                 if (config.target_cpu >= TARGET_PentiumPro)
1179                 {
1180                     regm_t rretregs = allregs & ~retregs;           // second arg
1181                     scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue
1182                     regm_t rlo = findreglsw(rretregs);
1183                     regm_t rhi = findregmsw(rretregs);
1184                     /*  IMUL    rhi,EAX
1185                         IMUL    EDX,rlo
1186                         ADD     rhi,EDX
1187                         MUL     rlo
1188                         ADD     EDX,rhi
1189                      */
1190                     getregs(cdb,mAX|mDX|mask(rhi));
1191                     cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1192                     cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1193                     cdb.gen2(0x03,modregrm(3,rhi,DX));
1194                     cdb.gen2(0xF7,modregrm(3,4,rlo));
1195                     cdb.gen2(0x03,modregrm(3,DX,rhi));
1196                     fixresult(cdb,e,mDX|mAX,pretregs);
1197                     return;
1198                 }
1199                 else
1200                 {
1201                     regm_t rretregs = mCX | mBX;           // second arg
1202                     scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1203                     callclib(cdb,e,CLIB.lmul,pretregs,0);
1204                     return;
1205                 }
1206             }
1207             assert(0);
1208 
1209         case OPvar:
1210             if (!I16 && sz <= REGSIZE)
1211             {
1212                 if (sz > 1)        // no byte version
1213                 {
1214                     // Generate IMUL r32,r/m32
1215                     regm_t retregs = *pretregs & (ALLREGS | mBP);
1216                     if (!retregs)
1217                         retregs = ALLREGS;
1218                     codelem(cdb,e1,&retregs,false);        // eval left leaf
1219                     regm_t resreg = retregs;
1220                     loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs);
1221                     freenode(e2);
1222                     fixresult(cdb,e,resreg,pretregs);
1223                     return;
1224                 }
1225             }
1226             else
1227             {
1228                 if (sz == 2 * REGSIZE)
1229                 {
1230                     if (e.EV.E1.Eoper != opunslng ||
1231                         e1.Ecount)
1232                         goto default;            // have to handle it with codelem()
1233 
1234                     regm_t retregs = ALLREGS & ~(mAX | mDX);
1235                     codelem(cdb,e1.EV.E1,&retregs,false);    // eval left leaf
1236                     const reg = findreg(retregs);
1237                     getregs(cdb,mAX);
1238                     genmovreg(cdb,AX,reg);            // MOV AX,reg
1239                     loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX);  // MUL EA+2
1240                     getregs(cdb,retregs);
1241                     cdb.gen1(0x90 + reg);                          // XCHG AX,reg
1242                     getregs(cdb,mAX | mDX);
1243                     if ((cs.Irm & 0xC0) == 0xC0)            // if EA is a register
1244                         loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA
1245                     else
1246                     {   getlvalue_lsw(&cs);
1247                         cdb.gen(&cs);                       // MUL EA
1248                     }
1249                     cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD DX,reg
1250 
1251                     freenode(e1);
1252                     fixresult(cdb,e,mAX | mDX,pretregs);
1253                     return;
1254                 }
1255                 assert(sz <= REGSIZE);
1256             }
1257 
1258             // loadea() handles CWD or CLR DX for divides
1259             regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX;
1260             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1261             loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0,
1262                    mAX,
1263                    mAX | mDX);
1264             freenode(e2);
1265             fixresult(cdb,e,mAX,pretregs);
1266             return;
1267     }
1268     assert(0);
1269 }
1270 
1271 
1272 /*****************************
1273  * Handle divide, modulo and remquo.
1274  * Note that modulo isn't defined for doubles.
1275  */
1276 
1277 @trusted
1278 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1279 {
1280     //printf("cddiv()\n");
1281     elem *e1 = e.EV.E1;
1282     elem *e2 = e.EV.E2;
1283     if (*pretregs == 0)                         // if don't want result
1284     {
1285         codelem(cdb,e1,pretregs,false);      // eval left leaf
1286         *pretregs = 0;                          // in case they got set
1287         codelem(cdb,e2,pretregs,false);
1288         return;
1289     }
1290 
1291     //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1292     const tyml = tybasic(e1.Ety);
1293     const ty = tybasic(e.Ety);
1294     const oper = e.Eoper;
1295 
1296     if (tyfloating(tyml))
1297     {
1298         if (tyvector(tyml) ||
1299             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
1300             !(*pretregs & mST0) &&
1301             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
1302             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
1303             !(ty == TYllong)  //   or passing to function through integer register
1304            )
1305         {
1306             orthxmm(cdb,e,pretregs);
1307             return;
1308         }
1309         if (config.exe & EX_posix)
1310             orth87(cdb,e,pretregs);
1311         else
1312             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
1313 
1314         return;
1315     }
1316 
1317     if (tyxmmreg(tyml))
1318     {
1319         orthxmm(cdb,e,pretregs);
1320         return;
1321     }
1322 
1323     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if uint operation, 0 if not
1324     const isbyte = tybyte(e.Ety) != 0;
1325     const sz = _tysize[tyml];
1326     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
1327     const uint grex = rex << 16;
1328 
1329     code cs = void;
1330     cs.Iflags = 0;
1331     cs.IFL2 = 0;
1332     cs.Irex = 0;
1333 
1334     switch (e2.Eoper)
1335     {
1336         case OPconst:
1337             auto d = cast(targ_size_t)el_tolong(e2);
1338             bool neg = false;
1339             const e2factor = d;
1340             if (!uns && cast(targ_llong)e2factor < 0)
1341             {   neg = true;
1342                 d = -d;
1343             }
1344 
1345             // Signed divide by a constant
1346             if ((d & (d - 1)) &&
1347                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1348                 config.flags4 & CFG4speed && !uns)
1349             {
1350                 /* R1 / 10
1351                  *
1352                  *  MOV     EAX,m
1353                  *  IMUL    R1
1354                  *  MOV     EAX,R1
1355                  *  SAR     EAX,31
1356                  *  SAR     EDX,shpost
1357                  *  SUB     EDX,EAX
1358                  *  IMUL    EAX,EDX,d
1359                  *  SUB     R1,EAX
1360                  *
1361                  * EDX = quotient
1362                  * R1 = remainder
1363                  */
1364                 assert(sz == 4 || sz == 8);
1365 
1366                 ulong m;
1367                 int shpost;
1368                 const int N = sz * 8;
1369                 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1370 
1371                 regm_t regm = allregs & ~(mAX | mDX);
1372                 codelem(cdb,e1,&regm,false);       // eval left leaf
1373                 const reg_t reg = findreg(regm);
1374                 getregs(cdb,regm | mDX | mAX);
1375 
1376                 /* Algorithm 5.2
1377                  * if m>=2**(N-1)
1378                  *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1379                  * else
1380                  *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1381                  * if (neg)
1382                  *    q = -q
1383                  */
1384                 const bool mgt = mhighbit || m >= (1UL << (N - 1));
1385                 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1386                 cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL R1
1387                 if (mgt)
1388                     cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,R1
1389                 getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1390                 genmovreg(cdb, AX, reg);                   // MOV EAX,R1
1391                 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1392                 if (shpost)
1393                     cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1394                 reg_t r3;
1395                 if (neg && oper == OPdiv)
1396                 {
1397                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1398                     r3 = AX;
1399                 }
1400                 else
1401                 {
1402                     cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1403                     r3 = DX;
1404                 }
1405 
1406                 // r3 is quotient
1407                 regm_t resregx;
1408                 switch (oper)
1409                 {   case OPdiv:
1410                         resregx = mask(r3);
1411                         break;
1412 
1413                     case OPmod:
1414                         assert(reg != AX && r3 == DX);
1415                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1416                         {
1417                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1418                         }
1419                         else
1420                         {
1421                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1422                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1423                             getregsNoSave(mAX);                             // EAX no longer contains 'd'
1424                         }
1425                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1426                         resregx = regm;
1427                         break;
1428 
1429                     case OPremquo:
1430                         assert(reg != AX && r3 == DX);
1431                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1432                         {
1433                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);     // IMUL EAX,EDX,d
1434                         }
1435                         else
1436                         {
1437                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1438                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1439                         }
1440                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1441                         genmovreg(cdb, AX, r3);                // MOV EAX,r3
1442                         if (neg)
1443                             cdb.gen2(0xF7,grex | modregrm(3,3,AX));         // NEG EAX
1444                         genmovreg(cdb, DX, reg);               // MOV EDX,R1
1445                         resregx = mDX | mAX;
1446                         break;
1447 
1448                     default:
1449                         assert(0);
1450                 }
1451                 freenode(e2);
1452                 fixresult(cdb,e,resregx,pretregs);
1453                 return;
1454             }
1455 
1456             // Unsigned divide by a constant
1457             if (e2factor > 2 && (e2factor & (e2factor - 1)) &&
1458                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1459                 config.flags4 & CFG4speed && uns)
1460             {
1461                 assert(sz == 4 || sz == 8);
1462 
1463                 reg_t r3;
1464                 regm_t regm;
1465                 reg_t reg;
1466                 ulong m;
1467                 int shpre;
1468                 int shpost;
1469                 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1470                 {
1471                     /* t1 = MULUH(m, n)
1472                      * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1473                      *   MOV   EAX,reg
1474                      *   MOV   EDX,m
1475                      *   MUL   EDX
1476                      *   MOV   EAX,reg
1477                      *   SUB   EAX,EDX
1478                      *   SHR   EAX,1
1479                      *   LEA   R3,[EAX][EDX]
1480                      *   SHR   R3,shpost-1
1481                      */
1482                     assert(shpre == 0);
1483 
1484                     regm = allregs & ~(mAX | mDX);
1485                     codelem(cdb,e1,&regm,false);       // eval left leaf
1486                     reg = findreg(regm);
1487                     getregs(cdb,mAX | mDX);
1488                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1489                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1490                     getregs(cdb,regm | mDX | mAX);
1491                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1492                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1493                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1494                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1495                     regm_t regm3 = allregs;
1496                     if (oper == OPmod || oper == OPremquo)
1497                     {
1498                         regm3 &= ~regm;
1499                         if (oper == OPremquo || !el_signx32(e2))
1500                             regm3 &= ~mAX;
1501                     }
1502                     allocreg(cdb,&regm3,&r3,TYint);
1503                     cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1504                     if (shpost != 1)
1505                         cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1506                 }
1507                 else
1508                 {
1509                     /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1510                      *   SHR   EAX,shpre
1511                      *   MOV   reg,m
1512                      *   MUL   reg
1513                      *   SHR   EDX,shpost
1514                      */
1515                     regm = mAX;
1516                     if (oper == OPmod || oper == OPremquo)
1517                         regm = allregs & ~(mAX|mDX);
1518                     codelem(cdb,e1,&regm,false);       // eval left leaf
1519                     reg = findreg(regm);
1520 
1521                     if (reg != AX)
1522                     {
1523                         getregs(cdb,mAX);
1524                         genmovreg(cdb,AX,reg);                 // MOV EAX,reg
1525                     }
1526                     if (shpre)
1527                     {
1528                         getregs(cdb,mAX);
1529                         cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1530                     }
1531                     getregs(cdb,mDX);
1532                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1533                     getregs(cdb,mDX | mAX);
1534                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1535                     if (shpost)
1536                         cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1537                     r3 = DX;
1538                 }
1539 
1540                 regm_t resreg;
1541                 switch (oper)
1542                 {   case OPdiv:
1543                         // r3 = quotient
1544                         resreg = mask(r3);
1545                         break;
1546 
1547                     case OPmod:
1548                         /* reg = original value
1549                          * r3  = quotient
1550                          */
1551                         assert(!(regm & mAX));
1552                         if (el_signx32(e2))
1553                         {
1554                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1555                         }
1556                         else
1557                         {
1558                             assert(!(mask(r3) & mAX));
1559                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1560                             getregs(cdb,mAX);
1561                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1562                         }
1563                         getregs(cdb,regm);
1564                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1565                         resreg = regm;
1566                         break;
1567 
1568                     case OPremquo:
1569                         /* reg = original value
1570                          * r3  = quotient
1571                          */
1572                         assert(!(mask(r3) & (mAX|regm)));
1573                         assert(!(regm & mAX));
1574                         if (el_signx32(e2))
1575                         {
1576                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1577                         }
1578                         else
1579                         {
1580                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor
1581                             getregs(cdb,mAX);
1582                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1583                         }
1584                         getregs(cdb,regm);
1585                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1586                         genmovreg(cdb, AX, r3);              // MOV EAX,r3
1587                         genmovreg(cdb, DX, reg);             // MOV EDX,reg
1588                         resreg = mDX | mAX;
1589                         break;
1590 
1591                     default:
1592                         assert(0);
1593                 }
1594                 freenode(e2);
1595                 fixresult(cdb,e,resreg,pretregs);
1596                 return;
1597             }
1598 
1599             const int pow2 = ispow2(e2factor);
1600 
1601             // Register pair signed divide by power of 2
1602             if (sz == REGSIZE * 2 &&
1603                 (oper == OPdiv) && !uns &&
1604                 pow2 != -1 &&
1605                 I32 // not set up for I64 cent yet
1606                )
1607             {
1608                 regm_t retregs = mDX | mAX;
1609                 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW))
1610                     retregs = (retregs & mMSW) | (BYTEREGS & mLSW);  // because of SETZ
1611 
1612                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1613                 const rhi = findregmsw(retregs);
1614                 const rlo = findreglsw(retregs);
1615                 freenode(e2);
1616                 getregs(cdb,retregs);
1617 
1618                 if (pow2 < 32)
1619                 {
1620                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1621 
1622                     genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
1623                     if (pow2 == 1)
1624                         cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
1625                     else
1626                     {
1627                         cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
1628                         cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
1629                     }
1630                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
1631                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
1632                     cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
1633                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
1634                 }
1635                 else if (pow2 == 32)
1636                 {
1637                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1638 
1639                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1640                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1641                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
1642                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
1643                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1644                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1645                 }
1646                 else if (pow2 < 63)
1647                 {
1648                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1649                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1650 
1651                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1652                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1653                     cdb.genmovreg(r2,r1);                                         // MOV r2,r1
1654 
1655                     if (pow2 == 33)
1656                     {
1657                         cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
1658                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
1659                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
1660                     }
1661                     else
1662                     {
1663                         cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
1664                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
1665                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
1666                     }
1667 
1668                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1669                     cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
1670                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1671                 }
1672                 else
1673                 {
1674                     // This may be better done by cgelem.d
1675                     assert(pow2 == 63);
1676                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
1677                     cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
1678                     cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
1679                     cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
1680                     movregconst(cdb,rhi,0,0);                              // MOV rhi,0
1681                 }
1682 
1683                 fixresult(cdb,e,retregs,pretregs);
1684                 return;
1685             }
1686 
1687             // Register pair signed modulo by power of 2
1688             if (sz == REGSIZE * 2 &&
1689                 (oper == OPmod) && !uns &&
1690                 pow2 != -1 &&
1691                 I32 // not set up for I64 cent yet
1692                )
1693             {
1694                 regm_t retregs = mDX | mAX;
1695                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1696                 const rhi = findregmsw(retregs);
1697                 const rlo = findreglsw(retregs);
1698                 freenode(e2);
1699                 getregs(cdb,retregs);
1700 
1701                 regm_t scratchm = allregs & ~retregs;
1702                 if (pow2 == 63)
1703                     scratchm &= BYTEREGS;               // because of SETZ
1704                 reg_t r1 = allocScratchReg(cdb, scratchm);
1705 
1706                 if (pow2 < 32)
1707                 {
1708                     cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
1709                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
1710                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1711                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1712                     cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
1713                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1714                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1715                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
1716                 }
1717                 else if (pow2 == 32)
1718                 {
1719                     cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
1720                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
1721                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
1722                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
1723                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
1724                 }
1725                 else if (pow2 < 63)
1726                 {
1727                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1728 
1729                     cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
1730                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
1731                     cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
1732                     cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
1733                     cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
1734                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
1735                     cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
1736                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
1737                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
1738                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
1739                 }
1740                 else
1741                 {
1742                     // This may be better done by cgelem.d
1743                     assert(pow2 == 63);
1744 
1745                     cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
1746                     cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
1747                     cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
1748                     cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
1749                     cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
1750                 }
1751 
1752                 fixresult(cdb,e,retregs,pretregs);
1753                 return;
1754             }
1755 
1756             if (sz > REGSIZE || !el_signx32(e2))
1757                 goto default;
1758 
1759             // Special code for signed divide or modulo by power of 2
1760             if ((sz == REGSIZE || (I64 && sz == 4)) &&
1761                 (oper == OPdiv || oper == OPmod) && !uns &&
1762                 pow2 != -1 &&
1763                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv)
1764                )
1765             {
1766                 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386)
1767                 {
1768                     /* MOV r,reg
1769                        SHR r,31
1770                        ADD reg,r
1771                        SAR reg,1
1772                      */
1773                     regm_t retregs = allregs;
1774                     codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1775                     const reg = findreg(retregs);
1776                     freenode(e2);
1777                     getregs(cdb,retregs);
1778 
1779                     reg_t r = allocScratchReg(cdb, allregs & ~retregs);
1780                     genmovreg(cdb,r,reg);                        // MOV r,reg
1781                     cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1782                     cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
1783                     cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
1784                     regm_t resreg = retregs;
1785                     fixresult(cdb,e,resreg,pretregs);
1786                     return;
1787                 }
1788 
1789                 regm_t resreg;
1790                 switch (oper)
1791                 {
1792                     case OPdiv:
1793                         resreg = mAX;
1794                         break;
1795 
1796                     case OPmod:
1797                         resreg = mDX;
1798                         break;
1799 
1800                     case OPremquo:
1801                         resreg = mDX | mAX;
1802                         break;
1803 
1804                     default:
1805                         assert(0);
1806                 }
1807 
1808                 regm_t retregs = mAX;
1809                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1810                 freenode(e2);
1811                 getregs(cdb,mAX | mDX);             // modify these regs
1812                 cdb.gen1(0x99);                             // CWD
1813                 code_orrex(cdb.last(), rex);
1814                 if (pow2 == 1)
1815                 {
1816                     if (oper == OPdiv)
1817                     {
1818                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));  // SUB AX,DX
1819                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));   // SAR AX,1
1820                     }
1821                     else // OPmod
1822                     {
1823                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));   // XOR AX,DX
1824                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1
1825                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));   // ADD DX,AX
1826                     }
1827                 }
1828                 else
1829                 {   targ_ulong m;
1830 
1831                     m = (1 << pow2) - 1;
1832                     if (oper == OPdiv)
1833                     {
1834                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);  // AND DX,m
1835                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));    // ADD AX,DX
1836                         // Be careful not to generate this for 8088
1837                         assert(config.target_cpu >= TARGET_80286);
1838                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2
1839                     }
1840                     else // OPmod
1841                     {
1842                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1843                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1844                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);  // AND AX,mask
1845                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1846                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1847                         resreg = mAX;
1848                     }
1849                 }
1850                 fixresult(cdb,e,resreg,pretregs);
1851                 return;
1852             }
1853             goto default;
1854 
1855         case OPind:
1856             if (!e2.Ecount)                        // if not CSE
1857                     goto case OPvar;                        // try OP reg,EA
1858             goto default;
1859 
1860         default:                                    // OPconst and operators
1861             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1862             regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX;
1863             codelem(cdb,e1,&retregs,false);           // eval left leaf
1864             regm_t rretregs;
1865             if (sz <= REGSIZE)                  // dedicated regs for div
1866             {
1867                 // pick some other regs
1868                 rretregs = isbyte ? BYTEREGS & ~mAX
1869                                 : ALLREGS & ~(mAX|mDX);
1870             }
1871             else
1872             {
1873                 assert(sz <= 2 * REGSIZE);
1874                 rretregs = mCX | mBX;           // second arg
1875             }
1876             scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1877             if (sz <= REGSIZE)
1878             {
1879                 getregs(cdb,mAX | mDX);     // trash these regs
1880                 if (uns)                        // unsigned divide
1881                 {
1882                     movregconst(cdb,DX,0,(sz == 8) ? 64 : 0);  // MOV DX,0
1883                     getregs(cdb,mDX);
1884                 }
1885                 else
1886                 {
1887                     cdb.gen1(0x99);                 // CWD
1888                     code_orrex(cdb.last(),rex);
1889                 }
1890                 reg_t rreg = findreg(rretregs);
1891                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg
1892                 if (I64 && isbyte && rreg >= 4)
1893                     code_orrex(cdb.last(), REX);
1894                 regm_t resreg;
1895                 switch (oper)
1896                 {
1897                     case OPdiv:
1898                         resreg = mAX;
1899                         break;
1900 
1901                     case OPmod:
1902                         resreg = mDX;
1903                         break;
1904 
1905                     case OPremquo:
1906                         resreg = mDX | mAX;
1907                         break;
1908 
1909                     default:
1910                         assert(0);
1911                 }
1912                 fixresult(cdb,e,resreg,pretregs);
1913             }
1914             else if (sz == 2 * REGSIZE)
1915             {
1916                 uint lib;
1917                 switch (oper)
1918                 {
1919                     case OPdiv:
1920                     case OPremquo:
1921                         lib = uns ? CLIB.uldiv : CLIB.ldiv;
1922                         break;
1923 
1924                     case OPmod:
1925                         lib = uns ? CLIB.ulmod : CLIB.lmod;
1926                         break;
1927 
1928                     default:
1929                         assert(0);
1930                 }
1931 
1932                 regm_t keepregs = I32 ? mSI | mDI : 0;
1933                 callclib(cdb,e,lib,pretregs,keepregs);
1934             }
1935             else
1936                     assert(0);
1937             return;
1938 
1939         case OPvar:
1940             if (I16 || sz == 2 * REGSIZE)
1941                 goto default;            // have to handle it with codelem()
1942 
1943             // loadea() handles CWD or CLR DX for divides
1944             regm_t retregs = mAX;
1945             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1946             loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0,
1947                    mAX | mDX,
1948                    mAX | mDX);
1949             freenode(e2);
1950             regm_t resreg;
1951             switch (oper)
1952             {
1953                 case OPdiv:
1954                     resreg = mAX;
1955                     break;
1956 
1957                 case OPmod:
1958                     resreg = mDX;
1959                     break;
1960 
1961                 case OPremquo:
1962                     resreg = mDX | mAX;
1963                     break;
1964 
1965                 default:
1966                     assert(0);
1967             }
1968             fixresult(cdb,e,resreg,pretregs);
1969             return;
1970     }
1971     assert(0);
1972 }
1973 
1974 
1975 /***************************
1976  * Handle OPnot and OPbool.
1977  * Generate:
1978  *      c:      [evaluate e1]
1979  *      cfalse: [save reg code]
1980  *              clr     reg
1981  *              jmp     cnop
1982  *      ctrue:  [save reg code]
1983  *              clr     reg
1984  *              inc     reg
1985  *      cnop:   nop
1986  */
1987 
1988 @trusted
1989 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1990 {
1991     //printf("cdnot()\n");
1992     reg_t reg;
1993     tym_t forflags;
1994     regm_t retregs;
1995     elem *e1 = e.EV.E1;
1996 
1997     if (*pretregs == 0)
1998         goto L1;
1999     if (*pretregs == mPSW)
2000     {   //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized
2001     L1:
2002         codelem(cdb,e1,pretregs,false);      // evaluate e1 for cc
2003         return;
2004     }
2005 
2006     OPER op = e.Eoper;
2007     uint sz = tysize(e1.Ety);
2008     uint rex = (I64 && sz == 8) ? REX_W : 0;
2009     uint grex = rex << 16;
2010 
2011     if (!tyfloating(e1.Ety))
2012     {
2013     if (sz <= REGSIZE && e1.Eoper == OPvar)
2014     {   code cs;
2015 
2016         getlvalue(cdb,&cs,e1,0);
2017         freenode(e1);
2018         if (!I16 && sz == 2)
2019             cs.Iflags |= CFopsize;
2020 
2021         retregs = *pretregs & (ALLREGS | mBP);
2022         if (config.target_cpu >= TARGET_80486 &&
2023             tysize(e.Ety) == 1)
2024         {
2025             if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,reg))
2026             {
2027                 cs.Iop = 0x39;
2028                 if (I64 && (sz == 1) && reg >= 4)
2029                     cs.Irex |= REX;
2030             }
2031             else
2032             {   cs.Iop = 0x81;
2033                 reg = 7;
2034                 cs.IFL2 = FLconst;
2035                 cs.IEV2.Vint = 0;
2036             }
2037             cs.Iop ^= (sz == 1);
2038             code_newreg(&cs,reg);
2039             cdb.gen(&cs);                             // CMP e1,0
2040 
2041             retregs &= BYTEREGS;
2042             if (!retregs)
2043                 retregs = BYTEREGS;
2044             allocreg(cdb,&retregs,&reg,TYint);
2045 
2046             const opcode_t iop = (op == OPbool)
2047                 ? 0x0F95    // SETNZ rm8
2048                 : 0x0F94;   // SETZ rm8
2049             cdb.gen2(iop, modregrmx(3,0,reg));
2050             if (reg >= 4)
2051                 code_orrex(cdb.last(), REX);
2052             if (op == OPbool)
2053                 *pretregs &= ~mPSW;
2054             goto L4;
2055         }
2056 
2057         if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,reg))
2058             cs.Iop = 0x39;
2059         else
2060         {   cs.Iop = 0x81;
2061             reg = 7;
2062             cs.IFL2 = FLconst;
2063             cs.IEV2.Vint = 1;
2064         }
2065         if (I64 && (sz == 1) && reg >= 4)
2066             cs.Irex |= REX;
2067         cs.Iop ^= (sz == 1);
2068         code_newreg(&cs,reg);
2069         cdb.gen(&cs);                         // CMP e1,1
2070 
2071         allocreg(cdb,&retregs,&reg,TYint);
2072         op ^= (OPbool ^ OPnot);                 // switch operators
2073         goto L2;
2074     }
2075     else if (config.target_cpu >= TARGET_80486 &&
2076         tysize(e.Ety) == 1)
2077     {
2078         int jop = jmpopcode(e.EV.E1);
2079         retregs = mPSW;
2080         codelem(cdb,e.EV.E1,&retregs,false);
2081         retregs = *pretregs & BYTEREGS;
2082         if (!retregs)
2083             retregs = BYTEREGS;
2084         allocreg(cdb,&retregs,&reg,TYint);
2085 
2086         int iop = 0x0F90 | (jop & 0x0F);        // SETcc rm8
2087         if (op == OPnot)
2088             iop ^= 1;
2089         cdb.gen2(iop,grex | modregrmx(3,0,reg));
2090         if (reg >= 4)
2091             code_orrex(cdb.last(), REX);
2092         if (op == OPbool)
2093             *pretregs &= ~mPSW;
2094         goto L4;
2095     }
2096     else if (sz <= REGSIZE &&
2097         // NEG bytereg is too expensive
2098         (sz != 1 || config.target_cpu < TARGET_PentiumPro))
2099     {
2100         retregs = *pretregs & (ALLREGS | mBP);
2101         if (sz == 1 && !(retregs &= BYTEREGS))
2102             retregs = BYTEREGS;
2103         codelem(cdb,e.EV.E1,&retregs,false);
2104         reg = findreg(retregs);
2105         getregs(cdb,retregs);
2106         cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg));   // NEG reg
2107         code_orflag(cdb.last(),CFpsw);
2108         if (!I16 && sz == SHORTSIZE)
2109             code_orflag(cdb.last(),CFopsize);
2110     L2:
2111         genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
2112         code_orrex(cdb.last(), rex);
2113         // At this point, reg==0 if e1==0, reg==-1 if e1!=0
2114         if (op == OPnot)
2115         {
2116             if (I64)
2117                 cdb.gen2(0xFF,grex | modregrmx(3,0,reg));    // INC reg
2118             else
2119                 cdb.gen1(0x40 + reg);                        // INC reg
2120         }
2121         else
2122             cdb.gen2(0xF7,grex | modregrmx(3,3,reg));    // NEG reg
2123         if (*pretregs & mPSW)
2124         {   code_orflag(cdb.last(),CFpsw);
2125             *pretregs &= ~mPSW;         // flags are always set anyway
2126         }
2127     L4:
2128         fixresult(cdb,e,retregs,pretregs);
2129         return;
2130     }
2131     }
2132     code *cnop = gennop(null);
2133     code *ctrue = gennop(null);
2134     logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue);
2135     forflags = *pretregs & mPSW;
2136     if (I64 && sz == 8)
2137         forflags |= 64;
2138     assert(tysize(e.Ety) <= REGSIZE);              // result better be int
2139     CodeBuilder cdbfalse;
2140     cdbfalse.ctor();
2141     allocreg(cdbfalse,pretregs,&reg,e.Ety);        // allocate reg for result
2142     code *cfalse = cdbfalse.finish();
2143     CodeBuilder cdbtrue;
2144     cdbtrue.ctor();
2145     cdbtrue.append(ctrue);
2146     for (code *c1 = cfalse; c1; c1 = code_next(c1))
2147         cdbtrue.gen(c1);                                      // duplicate reg save code
2148     CodeBuilder cdbfalse2;
2149     cdbfalse2.ctor();
2150     movregconst(cdbfalse2,reg,0,forflags);                    // mov 0 into reg
2151     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2152     movregconst(cdbtrue,reg,1,forflags);                      // mov 1 into reg
2153     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2154     genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop);          // skip over ctrue
2155     cdb.append(cfalse);
2156     cdb.append(cdbfalse2);
2157     cdb.append(cdbtrue);
2158     cdb.append(cnop);
2159 }
2160 
2161 
2162 /************************
2163  * Complement operator
2164  */
2165 
2166 @trusted
2167 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2168 {
2169     if (*pretregs == 0)
2170     {
2171         codelem(cdb,e.EV.E1,pretregs,false);
2172         return;
2173     }
2174     tym_t tym = tybasic(e.Ety);
2175     int sz = _tysize[tym];
2176     uint rex = (I64 && sz == 8) ? REX_W : 0;
2177     regm_t possregs = (sz == 1) ? BYTEREGS : allregs;
2178     regm_t retregs = *pretregs & possregs;
2179     if (retregs == 0)
2180         retregs = possregs;
2181     codelem(cdb,e.EV.E1,&retregs,false);
2182     getregs(cdb,retregs);                // retregs will be destroyed
2183 
2184     if (0 && sz == 4 * REGSIZE)
2185     {
2186         cdb.gen2(0xF7,modregrm(3,2,AX));   // NOT AX
2187         cdb.gen2(0xF7,modregrm(3,2,BX));   // NOT BX
2188         cdb.gen2(0xF7,modregrm(3,2,CX));   // NOT CX
2189         cdb.gen2(0xF7,modregrm(3,2,DX));   // NOT DX
2190     }
2191     else
2192     {
2193         const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs);
2194         const op = (sz == 1) ? 0xF6 : 0xF7;
2195         genregs(cdb,op,2,reg);     // NOT reg
2196         code_orrex(cdb.last(), rex);
2197         if (I64 && sz == 1 && reg >= 4)
2198             code_orrex(cdb.last(), REX);
2199         if (sz == 2 * REGSIZE)
2200         {
2201             const reg2 = findreglsw(retregs);
2202             genregs(cdb,op,2,reg2);  // NOT reg+1
2203         }
2204     }
2205     fixresult(cdb,e,retregs,pretregs);
2206 }
2207 
2208 /************************
2209  * Bswap operator
2210  */
2211 
2212 @trusted
2213 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2214 {
2215     if (*pretregs == 0)
2216     {
2217         codelem(cdb,e.EV.E1,pretregs,false);
2218         return;
2219     }
2220 
2221     const tym = tybasic(e.Ety);
2222     const sz = _tysize[tym];
2223     const posregs = (sz == 2) ? mAX|mBX|mCX|mDX : allregs;
2224     regm_t retregs = *pretregs & posregs;
2225     if (retregs == 0)
2226         retregs = posregs;
2227     codelem(cdb,e.EV.E1,&retregs,false);
2228     getregs(cdb,retregs);        // retregs will be destroyed
2229     if (sz == 2 * REGSIZE)
2230     {
2231         assert(sz != 16);                       // no cent support yet
2232         const msreg = findregmsw(retregs);
2233         cdb.gen1(0x0FC8 + (msreg & 7));         // BSWAP msreg
2234         const lsreg = findreglsw(retregs);
2235         cdb.gen1(0x0FC8 + (lsreg & 7));         // BSWAP lsreg
2236         cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg
2237     }
2238     else
2239     {
2240         const reg = findreg(retregs);
2241         if (sz == 2)
2242         {
2243             genregs(cdb,0x86,reg+4,reg);    // XCHG regL,regH
2244         }
2245         else
2246         {
2247             assert(sz == 4 || sz == 8);
2248             cdb.gen1(0x0FC8 + (reg & 7));      // BSWAP reg
2249             ubyte rex = 0;
2250             if (sz == 8)
2251                 rex |= REX_W;
2252             if (reg & 8)
2253                 rex |= REX_B;
2254             if (rex)
2255                 code_orrex(cdb.last(), rex);
2256         }
2257     }
2258     fixresult(cdb,e,retregs,pretregs);
2259 }
2260 
2261 /*************************
2262  * ?: operator
2263  */
2264 
2265 @trusted
2266 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2267 {
2268     con_t regconold,regconsave;
2269     uint stackpushold,stackpushsave;
2270     int ehindexold,ehindexsave;
2271     uint sz2;
2272 
2273     /* vars to save state of 8087 */
2274     int stackusedold,stackusedsave;
2275     NDP[global87.stack.length] _8087old;
2276     NDP[global87.stack.length] _8087save;
2277 
2278     //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2279     elem *e1 = e.EV.E1;
2280     elem *e2 = e.EV.E2;
2281     elem *e21 = e2.EV.E1;
2282     elem *e22 = e2.EV.E2;
2283     regm_t psw = *pretregs & mPSW;               /* save PSW bit                 */
2284     const op1 = e1.Eoper;
2285     uint sz1 = tysize(e1.Ety);
2286     uint jop = jmpopcode(e1);
2287 
2288     uint jop1 = jmpopcode(e21);
2289     uint jop2 = jmpopcode(e22);
2290 
2291     docommas(cdb,e1);
2292     cgstate.stackclean++;
2293 
2294     if (!OTrel(op1) && e1 == e21 &&
2295         sz1 <= REGSIZE && !tyfloating(e1.Ety))
2296     {   // Recognize (e ? e : f)
2297 
2298         code *cnop1 = gennop(null);
2299         regm_t retregs = *pretregs | mPSW;
2300         codelem(cdb,e1,&retregs,false);
2301 
2302         cse_flush(cdb,1);                // flush CSEs to memory
2303         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2304         freenode(e21);
2305 
2306         regconsave = regcon;
2307         stackpushsave = stackpush;
2308 
2309         retregs |= psw;
2310         if (retregs & (mBP | ALLREGS))
2311             regimmed_set(findreg(retregs),0);
2312         codelem(cdb,e22,&retregs,false);
2313 
2314         andregcon(regconsave);
2315         assert(stackpushsave == stackpush);
2316 
2317         *pretregs = retregs;
2318         freenode(e2);
2319         cdb.append(cnop1);
2320         cgstate.stackclean--;
2321         return;
2322     }
2323 
2324     if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE &&
2325         !e1.Ecount &&
2326         (jop == JC || jop == JNC) &&
2327         (sz2 = tysize(e2.Ety)) <= REGSIZE &&
2328         e21.Eoper == OPconst &&
2329         e22.Eoper == OPconst
2330        )
2331     {
2332         uint sz = tysize(e.Ety);
2333         uint rex = (I64 && sz == 8) ? REX_W : 0;
2334         uint grex = rex << 16;
2335 
2336         regm_t retregs;
2337         targ_size_t v1,v2;
2338 
2339         if (sz2 != 1 || I64)
2340         {
2341             retregs = *pretregs & (ALLREGS | mBP);
2342             if (!retregs)
2343                 retregs = ALLREGS;
2344         }
2345         else
2346         {
2347             retregs = *pretregs & BYTEREGS;
2348             if (!retregs)
2349                 retregs = BYTEREGS;
2350         }
2351 
2352         cdcmp_flag = 1 | rex;
2353         v1 = cast(targ_size_t)e21.EV.Vllong;
2354         v2 = cast(targ_size_t)e22.EV.Vllong;
2355         if (jop == JNC)
2356         {   v1 = v2;
2357             v2 = cast(targ_size_t)e21.EV.Vllong;
2358         }
2359 
2360         opcode_t opcode = 0x81;
2361         switch (sz2)
2362         {   case 1:     opcode--;
2363                         v1 = cast(byte) v1;
2364                         v2 = cast(byte) v2;
2365                         break;
2366 
2367             case 2:     v1 = cast(short) v1;
2368                         v2 = cast(short) v2;
2369                         break;
2370 
2371             case 4:     v1 = cast(int) v1;
2372                         v2 = cast(int) v2;
2373                         break;
2374             default:
2375                         break;
2376         }
2377 
2378         if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1)
2379         {
2380             // only zero-extension from 32-bits is available for 'or'
2381         }
2382         else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2)
2383         {
2384             // only sign-extension from 32-bits is available for 'and'
2385         }
2386         else
2387         {
2388             codelem(cdb,e1,&retregs,false);
2389             const reg = findreg(retregs);
2390 
2391             if (v1 == 0 && v2 == ~cast(targ_size_t)0)
2392             {
2393                 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg));  // NOT reg
2394                 if (I64 && sz2 == REGSIZE)
2395                     code_orrex(cdb.last(), REX_W);
2396                 if (I64 && sz2 == 1 && reg >= 4)
2397                     code_orrex(cdb.last(), REX);
2398             }
2399             else
2400             {
2401                 v1 -= v2;
2402                 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1);   // AND reg,v1-v2
2403                 if (I64 && sz2 == 1 && reg >= 4)
2404                     code_orrex(cdb.last(), REX);
2405                 if (v2 == 1 && !I64)
2406                     cdb.gen1(0x40 + reg);                     // INC reg
2407                 else if (v2 == -1L && !I64)
2408                     cdb.gen1(0x48 + reg);                     // DEC reg
2409                 else
2410                 {   cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2);   // ADD reg,v2
2411                     if (I64 && sz2 == 1 && reg >= 4)
2412                         code_orrex(cdb.last(), REX);
2413                 }
2414             }
2415 
2416             freenode(e21);
2417             freenode(e22);
2418             freenode(e2);
2419 
2420             fixresult(cdb,e,retregs,pretregs);
2421             cgstate.stackclean--;
2422             return;
2423         }
2424     }
2425 
2426     if (op1 != OPcond && op1 != OPandand && op1 != OPoror &&
2427         op1 != OPnot && op1 != OPbool &&
2428         e21.Eoper == OPconst &&
2429         sz1 <= REGSIZE &&
2430         *pretregs & (mBP | ALLREGS) &&
2431         tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety))
2432     {   // Recognize (e ? c : f)
2433 
2434         code *cnop1 = gennop(null);
2435         regm_t retregs = mPSW;
2436         jop = jmpopcode(e1);            // get jmp condition
2437         codelem(cdb,e1,&retregs,false);
2438 
2439         // Set the register with e21 without affecting the flags
2440         retregs = *pretregs & (ALLREGS | mBP);
2441         if (retregs & ~regcon.mvar)
2442             retregs &= ~regcon.mvar;    // don't disturb register variables
2443         // NOTE: see my email (sign extension bug? possible fix, some questions
2444         reg_t reg;
2445         regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,reg,tysize(e21.Ety) == 8 ? 64|8 : 8);
2446         retregs = mask(reg);
2447 
2448         cse_flush(cdb,1);                // flush CSE's to memory
2449         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2450         freenode(e21);
2451 
2452         regconsave = regcon;
2453         stackpushsave = stackpush;
2454 
2455         codelem(cdb,e22,&retregs,false);
2456 
2457         andregcon(regconsave);
2458         assert(stackpushsave == stackpush);
2459 
2460         freenode(e2);
2461         cdb.append(cnop1);
2462         fixresult(cdb,e,retregs,pretregs);
2463         cgstate.stackclean--;
2464         return;
2465     }
2466 
2467     code *cnop1 = gennop(null);
2468     code *cnop2 = gennop(null);         // dummy target addresses
2469     logexp(cdb,e1,false,FLcode,cnop1);  // evaluate condition
2470     regconold = regcon;
2471     stackusedold = global87.stackused;
2472     stackpushold = stackpush;
2473     memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof);
2474     regm_t retregs = *pretregs;
2475     CodeBuilder cdb1;
2476     cdb1.ctor();
2477     if (psw && jop1 != JNE)
2478     {
2479         retregs &= ~mPSW;
2480         if (!retregs)
2481             retregs = ALLREGS;
2482         codelem(cdb1,e21,&retregs,false);
2483         fixresult(cdb1,e21,retregs,pretregs);
2484     }
2485     else
2486         codelem(cdb1,e21,&retregs,false);
2487 
2488     if (CPP && e2.Eoper == OPcolon2)
2489     {
2490         code cs;
2491 
2492         // This is necessary so that any cleanup code on one branch
2493         // is redone on the other branch.
2494         cs.Iop = ESCAPE | ESCmark2;
2495         cs.Iflags = 0;
2496         cs.Irex = 0;
2497         cdb.gen(&cs);
2498         cdb.append(cdb1);
2499         cs.Iop = ESCAPE | ESCrelease2;
2500         cdb.gen(&cs);
2501     }
2502     else
2503         cdb.append(cdb1);
2504 
2505     regconsave = regcon;
2506     regcon = regconold;
2507 
2508     stackpushsave = stackpush;
2509     stackpush = stackpushold;
2510 
2511     stackusedsave = global87.stackused;
2512     global87.stackused = stackusedold;
2513 
2514     memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof);
2515     memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof);
2516 
2517     retregs |= psw;                     // PSW bit may have been trashed
2518     *pretregs |= psw;
2519     CodeBuilder cdb2;
2520     cdb2.ctor();
2521     if (psw && jop2 != JNE)
2522     {
2523         retregs &= ~mPSW;
2524         if (!retregs)
2525             retregs = ALLREGS;
2526         codelem(cdb2,e22,&retregs,false);
2527         fixresult(cdb2,e22,retregs,pretregs);
2528     }
2529     else
2530         codelem(cdb2,e22,&retregs,false);   // use same regs as E1
2531     *pretregs = retregs | psw;
2532     andregcon(regconold);
2533     andregcon(regconsave);
2534     assert(global87.stackused == stackusedsave);
2535     assert(stackpush == stackpushsave);
2536     memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof);
2537     freenode(e2);
2538     genjmp(cdb,JMP,FLcode,cast(block *) cnop2);
2539     cdb.append(cnop1);
2540     cdb.append(cdb2);
2541     cdb.append(cnop2);
2542     if (*pretregs & mST0)
2543         note87(e,0,0);
2544 
2545     cgstate.stackclean--;
2546 }
2547 
2548 /*********************
2549  * Comma operator OPcomma
2550  */
2551 
2552 @trusted
2553 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2554 {
2555     regm_t retregs = 0;
2556     codelem(cdb,e.EV.E1,&retregs,false);   // ignore value from left leaf
2557     codelem(cdb,e.EV.E2,pretregs,false);   // do right leaf
2558 }
2559 
2560 
2561 /*********************************
2562  * Do && and || operators.
2563  * Generate:
2564  *              (evaluate e1 and e2, if true goto cnop1)
2565  *      cnop3:  NOP
2566  *      cg:     [save reg code]         ;if we must preserve reg
2567  *              CLR     reg             ;false result (set Z also)
2568  *              JMP     cnop2
2569  *
2570  *      cnop1:  NOP                     ;if e1 evaluates to true
2571  *              [save reg code]         ;preserve reg
2572  *
2573  *              MOV     reg,1           ;true result
2574  *                  or
2575  *              CLR     reg             ;if return result in flags
2576  *              INC     reg
2577  *
2578  *      cnop2:  NOP                     ;mark end of code
2579  */
2580 
2581 @trusted
2582 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2583 {
2584     /* We can trip the assert with the following:
2585      *    if ( (b<=a) ? (c<b || a<=c) : c>=a )
2586      * We'll generate ugly code for it, but it's too obscure a case
2587      * to expend much effort on it.
2588      * assert(*pretregs != mPSW);
2589      */
2590 
2591     //printf("cdloglog() *pretregs: %s\n", regm_str(*pretregs));
2592     cgstate.stackclean++;
2593     code *cnop1 = gennop(null);
2594     CodeBuilder cdb1;
2595     cdb1.ctor();
2596     cdb1.append(cnop1);
2597     code *cnop3 = gennop(null);
2598     elem *e2 = e.EV.E2;
2599     (e.Eoper == OPoror)
2600         ? logexp(cdb,e.EV.E1,1,FLcode,cnop1)
2601         : logexp(cdb,e.EV.E1,0,FLcode,cnop3);
2602     con_t regconsave = regcon;
2603     uint stackpushsave = stackpush;
2604     if (*pretregs == 0)                 // if don't want result
2605     {
2606         int noreturn = !el_returns(e2);
2607         codelem(cdb,e2,pretregs,false);
2608         if (noreturn)
2609         {
2610             regconsave.used |= regcon.used;
2611             regcon = regconsave;
2612         }
2613         else
2614             andregcon(regconsave);
2615         assert(stackpush == stackpushsave);
2616         cdb.append(cnop3);
2617         cdb.append(cdb1);        // eval code, throw away result
2618         cgstate.stackclean--;
2619         return;
2620     }
2621 
2622     if (tybasic(e2.Ety) == TYnoreturn)
2623     {
2624         regm_t retregs2 = 0;
2625         codelem(cdb, e2, &retregs2, false);
2626         regconsave.used |= regcon.used;
2627         regcon = regconsave;
2628         assert(stackpush == stackpushsave);
2629 
2630         regm_t retregs = *pretregs & (ALLREGS | mBP);
2631         if (!retregs)
2632             retregs = ALLREGS;                                   // if mPSW only
2633 
2634         reg_t reg;
2635         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2636         movregconst(cdb1,reg,e.Eoper == OPoror,*pretregs & mPSW);
2637         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2638         *pretregs = retregs;
2639 
2640         cdb.append(cnop3);
2641         cdb.append(cdb1);        // eval code, throw away result
2642         cgstate.stackclean--;
2643         return;
2644     }
2645 
2646     code *cnop2 = gennop(null);
2647     uint sz = tysize(e.Ety);
2648     if (tybasic(e2.Ety) == TYbool &&
2649       sz == tysize(e2.Ety) &&
2650       !(*pretregs & mPSW) &&
2651       e2.Eoper == OPcall)
2652     {
2653         codelem(cdb,e2,pretregs,false);
2654 
2655         andregcon(regconsave);
2656 
2657         // stack depth should not change when evaluating E2
2658         assert(stackpush == stackpushsave);
2659 
2660         assert(sz <= 4);                                        // result better be int
2661         regm_t retregs = *pretregs & allregs;
2662         reg_t reg;
2663         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2664         movregconst(cdb1,reg,e.Eoper == OPoror,0);             // reg = 1
2665         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2666         *pretregs = retregs;
2667         if (e.Eoper == OPoror)
2668         {
2669             cdb.append(cnop3);
2670             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2671             cdb.append(cdb1);
2672             cdb.append(cnop2);
2673         }
2674         else
2675         {
2676             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2677             cdb.append(cnop3);
2678             cdb.append(cdb1);
2679             cdb.append(cnop2);
2680         }
2681         cgstate.stackclean--;
2682         return;
2683     }
2684 
2685     logexp(cdb,e2,1,FLcode,cnop1);
2686     andregcon(regconsave);
2687 
2688     // stack depth should not change when evaluating E2
2689     assert(stackpush == stackpushsave);
2690 
2691     assert(sz <= 4);                                         // result better be int
2692     regm_t retregs = *pretregs & (ALLREGS | mBP);
2693     if (!retregs)
2694         retregs = ALLREGS;                                   // if mPSW only
2695     CodeBuilder cdbcg;
2696     cdbcg.ctor();
2697     reg_t reg;
2698     allocreg(cdbcg,&retregs,&reg,TYint);                     // allocate reg for result
2699     code *cg = cdbcg.finish();
2700     for (code *c1 = cg; c1; c1 = code_next(c1))              // for each instruction
2701         cdb1.gen(c1);                                        // duplicate it
2702     CodeBuilder cdbcg2;
2703     cdbcg2.ctor();
2704     movregconst(cdbcg2,reg,0,*pretregs & mPSW);              // MOV reg,0
2705     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2706     genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2);              // JMP cnop2
2707     movregconst(cdb1,reg,1,*pretregs & mPSW);                // reg = 1
2708     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2709     *pretregs = retregs;
2710     cdb.append(cnop3);
2711     cdb.append(cg);
2712     cdb.append(cdbcg2);
2713     cdb.append(cdb1);
2714     cdb.append(cnop2);
2715     cgstate.stackclean--;
2716     return;
2717 }
2718 
2719 
2720 /*********************
2721  * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror).
2722  */
2723 
2724 @trusted
2725 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2726 {
2727     reg_t resreg;
2728     uint shiftcnt;
2729     regm_t retregs,rretregs;
2730 
2731     //printf("cdshift()\n");
2732     elem *e1 = e.EV.E1;
2733     if (*pretregs == 0)                   // if don't want result
2734     {
2735         codelem(cdb,e1,pretregs,false); // eval left leaf
2736         *pretregs = 0;                  // in case they got set
2737         codelem(cdb,e.EV.E2,pretregs,false);
2738         return;
2739     }
2740 
2741     tym_t tyml = tybasic(e1.Ety);
2742     int sz = _tysize[tyml];
2743     assert(!tyfloating(tyml));
2744     OPER oper = e.Eoper;
2745     uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16;
2746 
2747     uint s1,s2;
2748     switch (oper)
2749     {
2750         case OPshl:
2751             s1 = 4;                     // SHL
2752             s2 = 2;                     // RCL
2753             break;
2754         case OPshr:
2755             s1 = 5;                     // SHR
2756             s2 = 3;                     // RCR
2757             break;
2758         case OPashr:
2759             s1 = 7;                     // SAR
2760             s2 = 3;                     // RCR
2761             break;
2762         case OProl:
2763             s1 = 0;                     // ROL
2764             break;
2765         case OPror:
2766             s1 = 1;                     // ROR
2767             break;
2768         default:
2769             assert(0);
2770     }
2771 
2772     reg_t sreg = NOREG;                   // guard against using value without assigning to sreg
2773     elem *e2 = e.EV.E2;
2774     regm_t forccs = *pretregs & mPSW;            // if return result in CCs
2775     regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs
2776     bool e2isconst = false;                    // assume for the moment
2777     uint isbyte = (sz == 1);
2778     switch (e2.Eoper)
2779     {
2780         case OPconst:
2781             e2isconst = true;               // e2 is a constant
2782             shiftcnt = e2.EV.Vint;         // get shift count
2783             if ((!I16 && sz <= REGSIZE) ||
2784                 shiftcnt <= 4 ||            // if sequence of shifts
2785                 (sz == 2 &&
2786                     (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) ||
2787                 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2788                )
2789             {
2790                 retregs = (forregs) ? forregs
2791                                     : ALLREGS;
2792                 if (isbyte)
2793                 {   retregs &= BYTEREGS;
2794                     if (!retregs)
2795                         retregs = BYTEREGS;
2796                 }
2797                 else if (sz > REGSIZE && sz <= 2 * REGSIZE &&
2798                          !(retregs & mMSW))
2799                     retregs |= mMSW & ALLREGS;
2800                 if (s1 == 7)    // if arithmetic right shift
2801                 {
2802                     if (shiftcnt == 8)
2803                         retregs = mAX;
2804                     else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2805                         retregs = mDX|mAX;
2806                 }
2807 
2808                 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE &&
2809                     oper == OPshl &&
2810                     !e1.Ecount &&
2811                     (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 ||
2812                      e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64)
2813                    )
2814                 {   // Handle (shtlng)s << 16
2815                     regm_t r = retregs & mMSW;
2816                     codelem(cdb,e1.EV.E1,&r,false);      // eval left leaf
2817                     regwithvalue(cdb,retregs & mLSW,0,resreg,0);
2818                     getregs(cdb,r);
2819                     retregs = r | mask(resreg);
2820                     if (forccs)
2821                     {   sreg = findreg(r);
2822                         gentstreg(cdb,sreg);
2823                         *pretregs &= ~mPSW;             // already set
2824                     }
2825                     freenode(e1);
2826                     freenode(e2);
2827                     break;
2828                 }
2829 
2830                 // See if we should use LEA reg,xxx instead of shift
2831                 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 &&
2832                     (sz == REGSIZE || (I64 && sz == 4)) &&
2833                     oper == OPshl &&
2834                     e1.Eoper == OPvar &&
2835                     !(*pretregs & mPSW) &&
2836                     config.flags4 & CFG4speed
2837                    )
2838                 {
2839                     reg_t reg;
2840                     regm_t regm;
2841 
2842                     if (isregvar(e1,regm,reg) && !(regm & retregs))
2843                     {   code cs;
2844                         allocreg(cdb,&retregs,&resreg,e.Ety);
2845                         buildEA(&cs,-1,reg,1 << shiftcnt,0);
2846                         cs.Iop = LEA;
2847                         code_newreg(&cs,resreg);
2848                         cs.Iflags = 0;
2849                         if (I64 && sz == 8)
2850                             cs.Irex |= REX_W;
2851                         cdb.gen(&cs);             // LEA resreg,[reg * ss]
2852                         freenode(e1);
2853                         freenode(e2);
2854                         break;
2855                     }
2856                 }
2857 
2858                 codelem(cdb,e1,&retregs,false); // eval left leaf
2859                 //assert((retregs & regcon.mvar) == 0);
2860                 getregs(cdb,retregs);          // modify these regs
2861 
2862                 {
2863                     if (sz == 2 * REGSIZE)
2864                     {   resreg = findregmsw(retregs);
2865                         sreg = findreglsw(retregs);
2866                     }
2867                     else
2868                     {   resreg = findreg(retregs);
2869                         sreg = NOREG;              // an invalid value
2870                     }
2871                     if (config.target_cpu >= TARGET_80286 &&
2872                         sz <= REGSIZE)
2873                     {
2874                         // SHL resreg,shiftcnt
2875                         assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS)));
2876                         cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt);
2877                         if (shiftcnt == 1)
2878                             cdb.last().Iop += 0x10;     // short form of shift
2879                         if (I64 && sz == 1 && resreg >= 4)
2880                             cdb.last().Irex |= REX;
2881                         // See if we need operand size prefix
2882                         if (!I16 && oper != OPshl && sz == 2)
2883                             cdb.last().Iflags |= CFopsize;
2884                         if (forccs)
2885                             cdb.last().Iflags |= CFpsw;         // need flags result
2886                     }
2887                     else if (shiftcnt == 8)
2888                     {   if (!(retregs & BYTEREGS) || resreg >= 4)
2889                         {
2890                             goto L1;
2891                         }
2892 
2893                         if (pass != BackendPass.final_ && (!forregs || forregs & (mSI | mDI)))
2894                         {
2895                             // e1 might get into SI or DI in a later pass,
2896                             // so don't put CX into a register
2897                             getregs(cdb,mCX);
2898                         }
2899 
2900                         assert(sz == 2);
2901                         switch (oper)
2902                         {
2903                             case OPshl:
2904                                 // MOV regH,regL        XOR regL,regL
2905                                 assert(resreg < 4 && !grex);
2906                                 genregs(cdb,0x8A,resreg+4,resreg);
2907                                 genregs(cdb,0x32,resreg,resreg);
2908                                 break;
2909 
2910                             case OPshr:
2911                             case OPashr:
2912                                 // MOV regL,regH
2913                                 genregs(cdb,0x8A,resreg,resreg+4);
2914                                 if (oper == OPashr)
2915                                     cdb.gen1(0x98);           // CBW
2916                                 else
2917                                     genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH
2918                                 break;
2919 
2920                             case OPror:
2921                             case OProl:
2922                                 // XCHG regL,regH
2923                                 genregs(cdb,0x86,resreg+4,resreg);
2924                                 break;
2925 
2926                             default:
2927                                 assert(0);
2928                         }
2929                         if (forccs)
2930                             gentstreg(cdb,resreg);
2931                     }
2932                     else if (shiftcnt == REGSIZE * 8)   // it's an lword
2933                     {
2934                         if (oper == OPshl)
2935                             swap(&resreg, &sreg);
2936                         genmovreg(cdb,sreg,resreg);  // MOV sreg,resreg
2937                         if (oper == OPashr)
2938                             cdb.gen1(0x99);                       // CWD
2939                         else
2940                             movregconst(cdb,resreg,0,0);  // MOV resreg,0
2941                         if (forccs)
2942                         {
2943                             gentstreg(cdb,sreg);
2944                             *pretregs &= mBP | ALLREGS | mES;
2945                         }
2946                     }
2947                     else
2948                     {
2949                         if (oper == OPshl && sz == 2 * REGSIZE)
2950                             swap(&resreg, &sreg);
2951                         while (shiftcnt--)
2952                         {
2953                             cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg));
2954                             if (sz == 2 * REGSIZE)
2955                             {
2956                                 code_orflag(cdb.last(),CFpsw);
2957                                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2958                             }
2959                         }
2960                         if (forccs)
2961                             code_orflag(cdb.last(),CFpsw);
2962                     }
2963                     if (sz <= REGSIZE)
2964                         *pretregs &= mBP | ALLREGS;     // flags already set
2965                 }
2966                 freenode(e2);
2967                 break;
2968             }
2969             goto default;
2970 
2971         default:
2972             retregs = forregs & ~mCX;               // CX will be shift count
2973             if (sz <= REGSIZE)
2974             {
2975                 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar))
2976                     retregs = ALLREGS & ~mCX;       // need something
2977                 else if (!retregs)
2978                     retregs = ALLREGS & ~mCX;       // need something
2979                 if (sz == 1)
2980                 {   retregs &= mAX|mBX|mDX;
2981                     if (!retregs)
2982                         retregs = mAX|mBX|mDX;
2983                 }
2984             }
2985             else
2986             {
2987                 if (!(retregs & mMSW))
2988                     retregs = ALLREGS & ~mCX;
2989             }
2990             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
2991 
2992             if (sz <= REGSIZE)
2993                 resreg = findreg(retregs);
2994             else
2995             {
2996                 resreg = findregmsw(retregs);
2997                 sreg = findreglsw(retregs);
2998             }
2999         L1:
3000             rretregs = mCX;                 // CX is shift count
3001             if (sz <= REGSIZE)
3002             {
3003                 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue
3004                 getregs(cdb,retregs);      // trash these regs
3005                 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX
3006 
3007                 if (!I16 && sz == 2 && (oper == OProl || oper == OPror))
3008                     cdb.last().Iflags |= CFopsize;
3009 
3010                 // Note that a shift by CL does not set the flags if
3011                 // CL == 0. If e2 is a constant, we know it isn't 0
3012                 // (it would have been optimized out).
3013                 if (e2isconst)
3014                     *pretregs &= mBP | ALLREGS; // flags already set with result
3015             }
3016             else if (sz == 2 * REGSIZE &&
3017                      config.target_cpu >= TARGET_80386)
3018             {
3019                 reg_t hreg = resreg;
3020                 reg_t lreg = sreg;
3021                 uint rex = I64 ? (REX_W << 16) : 0;
3022                 if (e2isconst)
3023                 {
3024                     getregs(cdb,retregs);
3025                     if (shiftcnt & (REGSIZE * 8))
3026                     {
3027                         if (oper == OPshr)
3028                         {   //      SHR hreg,shiftcnt
3029                             //      MOV lreg,hreg
3030                             //      XOR hreg,hreg
3031                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8));
3032                             genmovreg(cdb,lreg,hreg);
3033                             movregconst(cdb,hreg,0,0);
3034                         }
3035                         else if (oper == OPashr)
3036                         {   //      MOV     lreg,hreg
3037                             //      SAR     hreg,31
3038                             //      SHRD    lreg,hreg,shiftcnt
3039                             genmovreg(cdb,lreg,hreg);
3040                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1);
3041                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8));
3042                         }
3043                         else
3044                         {   //      SHL lreg,shiftcnt
3045                             //      MOV hreg,lreg
3046                             //      XOR lreg,lreg
3047                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8));
3048                             genmovreg(cdb,hreg,lreg);
3049                             movregconst(cdb,lreg,0,0);
3050                         }
3051                     }
3052                     else
3053                     {
3054                         if (oper == OPshr || oper == OPashr)
3055                         {   //      SHRD    lreg,hreg,shiftcnt
3056                             //      SHR/SAR hreg,shiftcnt
3057                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt);
3058                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt);
3059                         }
3060                         else
3061                         {   //      SHLD hreg,lreg,shiftcnt
3062                             //      SHL  lreg,shiftcnt
3063                             cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt);
3064                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt);
3065                         }
3066                     }
3067                     freenode(e2);
3068                 }
3069                 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2)
3070                 {
3071                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3072                     getregs(cdb,retregs);          // modify these regs
3073                     if (oper == OPshl)
3074                     {
3075                         /*
3076                             SHLD    hreg,lreg,CL
3077                             SHL     lreg,CL
3078                          */
3079 
3080                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3081                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3082                     }
3083                     else
3084                     {
3085                         /*
3086                             SHRD    lreg,hreg,CL
3087                             SAR             hreg,CL
3088 
3089                             -- or --
3090 
3091                             SHRD    lreg,hreg,CL
3092                             SHR             hreg,CL
3093                          */
3094                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3095                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3096                     }
3097                 }
3098                 else
3099                 {   code* cl1,cl2;
3100 
3101                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3102                     getregs(cdb,retregs | mCX);     // modify these regs
3103                                                             // TEST CL,0x20
3104                     cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8);
3105                     cl1 = gennop(null);
3106                     CodeBuilder cdb1;
3107                     cdb1.ctor();
3108                     cdb1.append(cl1);
3109                     if (oper == OPshl)
3110                     {
3111                         /*  TEST    CL,20H
3112                             JNE     L1
3113                             SHLD    hreg,lreg,CL
3114                             SHL     lreg,CL
3115                             JMP     L2
3116                         L1: AND     CL,20H-1
3117                             SHL     lreg,CL
3118                             MOV     hreg,lreg
3119                             XOR     lreg,lreg
3120                         L2: NOP
3121                          */
3122 
3123                         if (REGSIZE == 2)
3124                             cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3125                         cdb1.gen2(0xD3,modregrm(3,4,lreg));
3126                         genmovreg(cdb1,hreg,lreg);
3127                         genregs(cdb1,0x31,lreg,lreg);
3128 
3129                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3130                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3131                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3132                     }
3133                     else
3134                     {   if (oper == OPashr)
3135                         {
3136                             /*  TEST        CL,20H
3137                                 JNE         L1
3138                                 SHRD        lreg,hreg,CL
3139                                 SAR         hreg,CL
3140                                 JMP         L2
3141                             L1: AND         CL,15
3142                                 MOV         lreg,hreg
3143                                 SAR         hreg,31
3144                                 SHRD        lreg,hreg,CL
3145                             L2: NOP
3146                              */
3147 
3148                             if (REGSIZE == 2)
3149                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3150                             genmovreg(cdb1,lreg,hreg);
3151                             cdb1.genc2(0xC1,modregrm(3,s1,hreg),31);
3152                             cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg));
3153                         }
3154                         else
3155                         {
3156                             /*  TEST        CL,20H
3157                                 JNE         L1
3158                                 SHRD        lreg,hreg,CL
3159                                 SHR         hreg,CL
3160                                 JMP         L2
3161                             L1: AND         CL,15
3162                                 SHR         hreg,CL
3163                                 MOV         lreg,hreg
3164                                 XOR         hreg,hreg
3165                             L2: NOP
3166                              */
3167 
3168                             if (REGSIZE == 2)
3169                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3170                             cdb1.gen2(0xD3,modregrm(3,5,hreg));
3171                             genmovreg(cdb1,lreg,hreg);
3172                             genregs(cdb1,0x31,hreg,hreg);
3173                         }
3174                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3175                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3176                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3177                     }
3178                     cl2 = gennop(null);
3179                     genjmp(cdb,JMPS,FLcode,cast(block *)cl2);
3180                     cdb.append(cdb1);
3181                     cdb.append(cl2);
3182                 }
3183                 break;
3184             }
3185             else if (sz == 2 * REGSIZE)
3186             {
3187                 scodelem(cdb,e2,&rretregs,retregs,false);
3188                 getregs(cdb,retregs | mCX);
3189                 if (oper == OPshl)
3190                     swap(&resreg, &sreg);
3191                 if (!e2isconst)                   // if not sure shift count != 0
3192                     cdb.genc2(0xE3,0,6);          // JCXZ .+6
3193                 cdb.gen2(0xD1,modregrm(3,s1,resreg));
3194                 code_orflag(cdb.last(),CFtarg2);
3195                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
3196                 cdb.genc2(0xE2,0,cast(targ_uns)-6);          // LOOP .-6
3197                 regimmed_set(CX,0);         // note that now CX == 0
3198             }
3199             else
3200                 assert(0);
3201             break;
3202     }
3203     fixresult(cdb,e,retregs,pretregs);
3204 }
3205 
3206 
3207 /***************************
3208  * Perform a 'star' reference (indirection).
3209  */
3210 
3211 @trusted
3212 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3213 {
3214     regm_t retregs;
3215     reg_t reg;
3216     uint nreg;
3217 
3218     //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3219     tym_t tym = tybasic(e.Ety);
3220     if (tyfloating(tym))
3221     {
3222         if (config.inline8087)
3223         {
3224             if (*pretregs & mST0)
3225             {
3226                 cdind87(cdb, e, pretregs);
3227                 return;
3228             }
3229             if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP))
3230             { }
3231             else if (tycomplex(tym))
3232             {
3233                 cload87(cdb, e, pretregs);
3234                 return;
3235             }
3236 
3237             if (*pretregs & mPSW)
3238             {
3239                 cdind87(cdb, e, pretregs);
3240                 return;
3241             }
3242         }
3243     }
3244 
3245     elem *e1 = e.EV.E1;
3246     assert(e1);
3247     switch (tym)
3248     {
3249         case TYstruct:
3250         case TYarray:
3251             // This case should never happen, why is it here?
3252             tym = TYnptr;               // don't confuse allocreg()
3253             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
3254                     tym = TYfptr;
3255             break;
3256 
3257         default:
3258             break;
3259     }
3260     uint sz = _tysize[tym];
3261     uint isbyte = tybyte(tym) != 0;
3262 
3263     code cs;
3264 
3265      getlvalue(cdb,&cs,e,RMload);          // get addressing mode
3266     //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib);
3267     //fprintf(stderr,"cd2 :\n"); WRcodlst(c);
3268     if (*pretregs == 0)
3269     {
3270         if (e.Ety & mTYvolatile)               // do the load anyway
3271             *pretregs = regmask(e.Ety, 0);     // load into registers
3272         else
3273             return;
3274     }
3275 
3276     regm_t idxregs = idxregm(&cs);               // mask of index regs used
3277 
3278     if (*pretregs == mPSW)
3279     {
3280         if (!I16 && tym == TYfloat)
3281         {
3282             retregs = ALLREGS & ~idxregs;
3283             allocreg(cdb,&retregs,&reg,TYfloat);
3284             cs.Iop = 0x8B;
3285             code_newreg(&cs,reg);
3286             cdb.gen(&cs);                       // MOV reg,lsw
3287             cdb.gen2(0xD1,modregrmx(3,4,reg));  // SHL reg,1
3288             code_orflag(cdb.last(), CFpsw);
3289         }
3290         else if (sz <= REGSIZE)
3291         {
3292             cs.Iop = 0x81 ^ isbyte;
3293             cs.Irm |= modregrm(0,7,0);
3294             cs.IFL2 = FLconst;
3295             cs.IEV2.Vsize_t = 0;
3296             cdb.gen(&cs);             // CMP [idx],0
3297         }
3298         else if (!I16 && sz == REGSIZE + 2)      // if far pointer
3299         {
3300             retregs = ALLREGS & ~idxregs;
3301             allocreg(cdb,&retregs,&reg,TYint);
3302             cs.Iop = MOVZXw;
3303             cs.Irm |= modregrm(0,reg,0);
3304             getlvalue_msw(&cs);
3305             cdb.gen(&cs);             // MOVZX reg,msw
3306             goto L4;
3307         }
3308         else if (sz <= 2 * REGSIZE)
3309         {
3310             retregs = ALLREGS & ~idxregs;
3311             allocreg(cdb,&retregs,&reg,TYint);
3312             cs.Iop = 0x8B;
3313             code_newreg(&cs,reg);
3314             getlvalue_msw(&cs);
3315             cdb.gen(&cs);             // MOV reg,msw
3316             if (I32)
3317             {   if (tym == TYdouble || tym == TYdouble_alias)
3318                     cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1
3319             }
3320             else if (tym == TYfloat)
3321                 cdb.gen2(0xD1,modregrm(3,4,reg));    // SHL reg,1
3322         L4:
3323             cs.Iop = 0x0B;
3324             getlvalue_lsw(&cs);
3325             cs.Iflags |= CFpsw;
3326             cdb.gen(&cs);                    // OR reg,lsw
3327         }
3328         else if (!I32 && sz == 8)
3329         {
3330             *pretregs |= DOUBLEREGS_16;     // fake it for now
3331             goto L1;
3332         }
3333         else
3334         {
3335             debug printf("%s\n", tym_str(tym));
3336             assert(0);
3337         }
3338     }
3339     else                                // else return result in reg
3340     {
3341     L1:
3342         retregs = *pretregs;
3343         if (sz == 8 &&
3344             (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK)
3345         {   int i;
3346 
3347             // Optimizer should not CSE these, as the result is worse code!
3348             assert(!e.Ecount);
3349 
3350             cs.Iop = 0xFF;
3351             cs.Irm |= modregrm(0,6,0);
3352             cs.IEV1.Voffset += 8 - REGSIZE;
3353             stackchanged = 1;
3354             i = 8 - REGSIZE;
3355             do
3356             {
3357                 cdb.gen(&cs);                         // PUSH EA+i
3358                 cdb.genadjesp(REGSIZE);
3359                 cs.IEV1.Voffset -= REGSIZE;
3360                 stackpush += REGSIZE;
3361                 i -= REGSIZE;
3362             }
3363             while (i >= 0);
3364             goto L3;
3365         }
3366         if (I16 && sz == 8)
3367             retregs = DOUBLEREGS_16;
3368 
3369         // Watch out for loading an lptr from an lptr! We must have
3370         // the offset loaded into a different register.
3371         /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes)
3372                 retregs = ALLREGS;*/
3373 
3374         {
3375             assert(!isbyte || retregs & BYTEREGS);
3376             allocreg(cdb,&retregs,&reg,tym); // alloc registers
3377         }
3378         if (retregs & XMMREGS)
3379         {
3380             assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector
3381             cs.Iop = xmmload(tym);
3382             cs.Irex &= ~REX_W;
3383             code_newreg(&cs,reg - XMM0);
3384             checkSetVex(&cs,tym);
3385             cdb.gen(&cs);     // MOV reg,[idx]
3386         }
3387         else if (sz <= REGSIZE)
3388         {
3389             cs.Iop = 0x8B;                                  // MOV
3390             if (sz <= 2 && !I16 &&
3391                 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed)
3392             {
3393                 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw;      // MOVZX/MOVSX
3394                 cs.Iflags &= ~CFopsize;
3395             }
3396             cs.Iop ^= isbyte;
3397         L2:
3398             code_newreg(&cs,reg);
3399             cdb.gen(&cs);     // MOV reg,[idx]
3400             if (isbyte && reg >= 4)
3401                 code_orrex(cdb.last(), REX);
3402         }
3403         else if ((tym == TYfptr || tym == TYhptr) && retregs & mES)
3404         {
3405             cs.Iop = 0xC4;          // LES reg,[idx]
3406             goto L2;
3407         }
3408         else if (sz <= 2 * REGSIZE)
3409         {   uint lsreg;
3410 
3411             cs.Iop = 0x8B;
3412             // Be careful not to interfere with index registers
3413             if (!I16)
3414             {
3415                 // Can't handle if both result registers are used in
3416                 // the addressing mode.
3417                 if ((retregs & idxregs) == retregs)
3418                 {
3419                     retregs = mMSW & allregs & ~idxregs;
3420                     if (!retregs)
3421                         retregs |= mCX;
3422                     retregs |= mLSW & ~idxregs;
3423 
3424                     // We can run out of registers, so if that's possible,
3425                     // give us *one* of the idxregs
3426                     if ((retregs & ~regcon.mvar & mLSW) == 0)
3427                     {
3428                         regm_t x = idxregs & mLSW;
3429                         if (x)
3430                             retregs |= mask(findreg(x));        // give us one idxreg
3431                     }
3432                     else if ((retregs & ~regcon.mvar & mMSW) == 0)
3433                     {
3434                         regm_t x = idxregs & mMSW;
3435                         if (x)
3436                             retregs |= mask(findreg(x));        // give us one idxreg
3437                     }
3438 
3439                     allocreg(cdb,&retregs,&reg,tym);     // alloc registers
3440                     assert((retregs & idxregs) != retregs);
3441                 }
3442 
3443                 lsreg = findreglsw(retregs);
3444                 if (mask(reg) & idxregs)                // reg is in addr mode
3445                 {
3446                     code_newreg(&cs,lsreg);
3447                     cdb.gen(&cs);                 // MOV lsreg,lsw
3448                     if (sz == REGSIZE + 2)
3449                         cs.Iflags |= CFopsize;
3450                     lsreg = reg;
3451                     getlvalue_msw(&cs);                 // MOV reg,msw
3452                 }
3453                 else
3454                 {
3455                     code_newreg(&cs,reg);
3456                     getlvalue_msw(&cs);
3457                     cdb.gen(&cs);                 // MOV reg,msw
3458                     if (sz == REGSIZE + 2)
3459                         cdb.last().Iflags |= CFopsize;
3460                     getlvalue_lsw(&cs);                 // MOV lsreg,lsw
3461                 }
3462                 NEWREG(cs.Irm,lsreg);
3463                 cdb.gen(&cs);
3464             }
3465             else
3466             {
3467                 // Index registers are always the lsw!
3468                 cs.Irm |= modregrm(0,reg,0);
3469                 getlvalue_msw(&cs);
3470                 cdb.gen(&cs);     // MOV reg,msw
3471                 lsreg = findreglsw(retregs);
3472                 NEWREG(cs.Irm,lsreg);
3473                 getlvalue_lsw(&cs);     // MOV lsreg,lsw
3474                 cdb.gen(&cs);
3475             }
3476         }
3477         else if (I16 && sz == 8)
3478         {
3479             assert(reg == AX);
3480             cs.Iop = 0x8B;
3481             cs.IEV1.Voffset += 6;
3482             cdb.gen(&cs);             // MOV AX,EA+6
3483             cs.Irm |= modregrm(0,CX,0);
3484             cs.IEV1.Voffset -= 4;
3485             cdb.gen(&cs);                    // MOV CX,EA+2
3486             NEWREG(cs.Irm,DX);
3487             cs.IEV1.Voffset -= 2;
3488             cdb.gen(&cs);                    // MOV DX,EA
3489             cs.IEV1.Voffset += 4;
3490             NEWREG(cs.Irm,BX);
3491             cdb.gen(&cs);                    // MOV BX,EA+4
3492         }
3493         else
3494             assert(0);
3495     L3:
3496         fixresult(cdb,e,retregs,pretregs);
3497     }
3498     //fprintf(stderr,"cdafter :\n"); WRcodlst(c);
3499 }
3500 
3501 
3502 
3503 /********************************
3504  * Generate code to load ES with the right segment value,
3505  * do nothing if e is a far pointer.
3506  */
3507 
3508 @trusted
3509 private code *cod2_setES(tym_t ty)
3510 {
3511     if (config.exe & EX_flat)
3512         return null;
3513 
3514     int push;
3515 
3516     CodeBuilder cdb;
3517     cdb.ctor();
3518     switch (tybasic(ty))
3519     {
3520         case TYnptr:
3521             if (!(config.flags3 & CFG3eseqds))
3522             {   push = 0x1E;            // PUSH DS
3523                 goto L1;
3524             }
3525             break;
3526         case TYcptr:
3527             push = 0x0E;                // PUSH CS
3528             goto L1;
3529         case TYsptr:
3530             if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds))
3531             {   push = 0x16;            // PUSH SS
3532             L1:
3533                 // Must load ES
3534                 getregs(cdb,mES);
3535                 cdb.gen1(push);
3536                 cdb.gen1(0x07);         // POP ES
3537             }
3538             break;
3539 
3540         default:
3541             break;
3542     }
3543     return cdb.finish();
3544 }
3545 
3546 /********************************
3547  * Generate code for intrinsic strlen().
3548  */
3549 
3550 @trusted
3551 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3552 {
3553     /* Generate strlen in CX:
3554         LES     DI,e1
3555         CLR     AX                      ;scan for 0
3556         MOV     CX,-1                   ;largest possible string
3557         REPNE   SCASB
3558         NOT     CX
3559         DEC     CX
3560      */
3561 
3562     regm_t retregs = mDI;
3563     tym_t ty1 = e.EV.E1.Ety;
3564     if (!tyreg(ty1))
3565         retregs |= mES;
3566     codelem(cdb,e.EV.E1,&retregs,false);
3567 
3568     // Make sure ES contains proper segment value
3569     cdb.append(cod2_setES(ty1));
3570 
3571     ubyte rex = I64 ? REX_W : 0;
3572 
3573     getregs_imm(cdb,mAX | mCX);
3574     movregconst(cdb,AX,0,1);               // MOV AL,0
3575     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);  // MOV CX,-1
3576     getregs(cdb,mDI|mCX);
3577     cdb.gen1(0xF2);                                     // REPNE
3578     cdb.gen1(0xAE);                                     // SCASB
3579     genregs(cdb,0xF7,2,CX);                // NOT CX
3580     code_orrex(cdb.last(), rex);
3581     if (I64)
3582         cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX));  // DEC reg
3583     else
3584         cdb.gen1(0x48 + CX);                            // DEC CX
3585 
3586     if (*pretregs & mPSW)
3587     {
3588         cdb.last().Iflags |= CFpsw;
3589         *pretregs &= ~mPSW;
3590     }
3591     fixresult(cdb,e,mCX,pretregs);
3592 }
3593 
3594 
3595 /*********************************
3596  * Generate code for strcmp(s1,s2) intrinsic.
3597  */
3598 
3599 @trusted
3600 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3601 {
3602     char need_DS;
3603     int segreg;
3604 
3605     /*
3606         MOV     SI,s1                   ;get destination pointer (s1)
3607         MOV     CX,s1+2
3608         LES     DI,s2                   ;get source pointer (s2)
3609         PUSH    DS
3610         MOV     DS,CX
3611         CLR     AX                      ;scan for 0
3612         MOV     CX,-1                   ;largest possible string
3613         REPNE   SCASB
3614         NOT     CX                      ;CX = string length of s2
3615         SUB     DI,CX                   ;point DI back to beginning
3616         REPE    CMPSB                   ;compare string
3617         POP     DS
3618         JE      L1                      ;strings are equal
3619         SBB     AX,AX
3620         SBB     AX,-1
3621     L1:
3622     */
3623 
3624     regm_t retregs1 = mSI;
3625     tym_t ty1 = e.EV.E1.Ety;
3626     if (!tyreg(ty1))
3627         retregs1 |= mCX;
3628     codelem(cdb,e.EV.E1,&retregs1,false);
3629 
3630     regm_t retregs = mDI;
3631     tym_t ty2 = e.EV.E2.Ety;
3632     if (!tyreg(ty2))
3633         retregs |= mES;
3634     scodelem(cdb,e.EV.E2,&retregs,retregs1,false);
3635 
3636     // Make sure ES contains proper segment value
3637     cdb.append(cod2_setES(ty2));
3638     getregs_imm(cdb,mAX | mCX);
3639 
3640     ubyte rex = I64 ? REX_W : 0;
3641 
3642     // Load DS with right value
3643     switch (tybasic(ty1))
3644     {
3645         case TYnptr:
3646         case TYimmutPtr:
3647             need_DS = false;
3648             break;
3649 
3650         case TYsptr:
3651             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3652                 segreg = SEG_SS;
3653             else
3654                 segreg = SEG_DS;
3655             goto L1;
3656         case TYcptr:
3657             segreg = SEG_CS;
3658         L1:
3659             cdb.gen1(0x1E);                         // PUSH DS
3660             cdb.gen1(0x06 + (segreg << 3));         // PUSH segreg
3661             cdb.gen1(0x1F);                         // POP  DS
3662             need_DS = true;
3663             break;
3664         case TYfptr:
3665         case TYvptr:
3666         case TYhptr:
3667             cdb.gen1(0x1E);                         // PUSH DS
3668             cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));   // MOV DS,CX
3669             need_DS = true;
3670             break;
3671         default:
3672             assert(0);
3673     }
3674 
3675     movregconst(cdb,AX,0,0);                // MOV AX,0
3676     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);   // MOV CX,-1
3677     getregs(cdb,mSI|mDI|mCX);
3678     cdb.gen1(0xF2);                              // REPNE
3679     cdb.gen1(0xAE);                              // SCASB
3680     genregs(cdb,0xF7,2,CX);         // NOT CX
3681     code_orrex(cdb.last(),rex);
3682     genregs(cdb,0x2B,DI,CX);        // SUB DI,CX
3683     code_orrex(cdb.last(),rex);
3684     cdb.gen1(0xF3);                              // REPE
3685     cdb.gen1(0xA6);                              // CMPSB
3686     if (need_DS)
3687         cdb.gen1(0x1F);                          // POP DS
3688     code *c4 = gennop(null);
3689     if (*pretregs != mPSW)                       // if not flags only
3690     {
3691         genjmp(cdb,JE,FLcode,cast(block *) c4);      // JE L1
3692         getregs(cdb,mAX);
3693         genregs(cdb,0x1B,AX,AX);                 // SBB AX,AX
3694         code_orrex(cdb.last(),rex);
3695         cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1);   // SBB AX,-1
3696     }
3697 
3698     *pretregs &= ~mPSW;
3699     cdb.append(c4);
3700     fixresult(cdb,e,mAX,pretregs);
3701 }
3702 
3703 /*********************************
3704  * Generate code for memcmp(s1,s2,n) intrinsic.
3705  */
3706 
3707 @trusted
3708 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3709 {
3710     char need_DS;
3711     int segreg;
3712 
3713     /*
3714         MOV     SI,s1                   ;get destination pointer (s1)
3715         MOV     DX,s1+2
3716         LES     DI,s2                   ;get source pointer (s2)
3717         MOV     CX,n                    ;get number of bytes to compare
3718         PUSH    DS
3719         MOV     DS,DX
3720         XOR     AX,AX
3721         REPE    CMPSB                   ;compare string
3722         POP     DS
3723         JE      L1                      ;strings are equal
3724         SBB     AX,AX
3725         SBB     AX,-1
3726     L1:
3727     */
3728 
3729     elem *e1 = e.EV.E1;
3730     assert(e1.Eoper == OPparam);
3731 
3732     // Get s1 into DX:SI
3733     regm_t retregs1 = mSI;
3734     tym_t ty1 = e1.EV.E1.Ety;
3735     if (!tyreg(ty1))
3736         retregs1 |= mDX;
3737     codelem(cdb,e1.EV.E1,&retregs1,false);
3738 
3739     // Get s2 into ES:DI
3740     regm_t retregs = mDI;
3741     tym_t ty2 = e1.EV.E2.Ety;
3742     if (!tyreg(ty2))
3743         retregs |= mES;
3744     scodelem(cdb,e1.EV.E2,&retregs,retregs1,false);
3745     freenode(e1);
3746 
3747     // Get nbytes into CX
3748     regm_t retregs3 = mCX;
3749     scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false);
3750 
3751     // Make sure ES contains proper segment value
3752     cdb.append(cod2_setES(ty2));
3753 
3754     // Load DS with right value
3755     switch (tybasic(ty1))
3756     {
3757         case TYnptr:
3758         case TYimmutPtr:
3759             need_DS = false;
3760             break;
3761 
3762         case TYsptr:
3763             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3764                 segreg = SEG_SS;
3765             else
3766                 segreg = SEG_DS;
3767             goto L1;
3768         case TYcptr:
3769             segreg = SEG_CS;
3770         L1:
3771             cdb.gen1(0x1E);                     // PUSH DS
3772             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3773             cdb.gen1(0x1F);                     // POP  DS
3774             need_DS = true;
3775             break;
3776         case TYfptr:
3777         case TYvptr:
3778         case TYhptr:
3779             cdb.gen1(0x1E);                        // PUSH DS
3780             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3781             need_DS = true;
3782             break;
3783         default:
3784             assert(0);
3785     }
3786 
3787     static if (1)
3788     {
3789         getregs(cdb,mAX);
3790         cdb.gen2(0x33,modregrm(3,AX,AX));           // XOR AX,AX
3791         code_orflag(cdb.last(), CFpsw);             // keep flags
3792     }
3793     else
3794     {
3795         if (*pretregs != mPSW)                      // if not flags only
3796         {
3797             reg_t r;
3798             regwithvalue(cdb,mAX,0,r,0);         // put 0 in AX
3799         }
3800     }
3801 
3802     getregs(cdb,mCX | mSI | mDI);
3803     cdb.gen1(0xF3);                             // REPE
3804     cdb.gen1(0xA6);                             // CMPSB
3805     if (need_DS)
3806         cdb.gen1(0x1F);                         // POP DS
3807     if (*pretregs != mPSW)                      // if not flags only
3808     {
3809         code *c4 = gennop(null);
3810         genjmp(cdb,JE,FLcode,cast(block *) c4);  // JE L1
3811         getregs(cdb,mAX);
3812         genregs(cdb,0x1B,AX,AX);             // SBB AX,AX
3813         cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1);    // SBB AX,-1
3814         cdb.append(c4);
3815     }
3816 
3817     *pretregs &= ~mPSW;
3818     fixresult(cdb,e,mAX,pretregs);
3819 }
3820 
3821 /*********************************
3822  * Generate code for strcpy(s1,s2) intrinsic.
3823  */
3824 
3825 @trusted
3826 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3827 {
3828     char need_DS;
3829     int segreg;
3830 
3831     /*
3832         LES     DI,s2                   ;ES:DI = s2
3833         CLR     AX                      ;scan for 0
3834         MOV     CX,-1                   ;largest possible string
3835         REPNE   SCASB                   ;find end of s2
3836         NOT     CX                      ;CX = strlen(s2) + 1 (for EOS)
3837         SUB     DI,CX
3838         MOV     SI,DI
3839         PUSH    DS
3840         PUSH    ES
3841         LES     DI,s1
3842         POP     DS
3843         MOV     AX,DI                   ;return value is s1
3844         REP     MOVSB
3845         POP     DS
3846     */
3847 
3848     stackchanged = 1;
3849     regm_t retregs = mDI;
3850     tym_t ty2 = tybasic(e.EV.E2.Ety);
3851     if (!tyreg(ty2))
3852         retregs |= mES;
3853     ubyte rex = I64 ? REX_W : 0;
3854     codelem(cdb,e.EV.E2,&retregs,false);
3855 
3856     // Make sure ES contains proper segment value
3857     cdb.append(cod2_setES(ty2));
3858     getregs_imm(cdb,mAX | mCX);
3859     movregconst(cdb,AX,0,1);       // MOV AL,0
3860     movregconst(cdb,CX,-1,I64?64:0);  // MOV CX,-1
3861     getregs(cdb,mAX|mCX|mSI|mDI);
3862     cdb.gen1(0xF2);                             // REPNE
3863     cdb.gen1(0xAE);                             // SCASB
3864     genregs(cdb,0xF7,2,CX);                     // NOT CX
3865     code_orrex(cdb.last(),rex);
3866     genregs(cdb,0x2B,DI,CX);                    // SUB DI,CX
3867     code_orrex(cdb.last(),rex);
3868     genmovreg(cdb,SI,DI);          // MOV SI,DI
3869 
3870     // Load DS with right value
3871     switch (ty2)
3872     {
3873         case TYnptr:
3874         case TYimmutPtr:
3875             need_DS = false;
3876             break;
3877 
3878         case TYsptr:
3879             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3880                 segreg = SEG_SS;
3881             else
3882                 segreg = SEG_DS;
3883             goto L1;
3884         case TYcptr:
3885             segreg = SEG_CS;
3886         L1:
3887             cdb.gen1(0x1E);                     // PUSH DS
3888             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3889             cdb.genadjesp(REGSIZE * 2);
3890             need_DS = true;
3891             break;
3892         case TYfptr:
3893         case TYvptr:
3894         case TYhptr:
3895             segreg = SEG_ES;
3896             goto L1;
3897 
3898         default:
3899             assert(0);
3900     }
3901 
3902     retregs = mDI;
3903     tym_t ty1 = tybasic(e.EV.E1.Ety);
3904     if (!tyreg(ty1))
3905         retregs |= mES;
3906     scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false);
3907     getregs(cdb,mAX|mCX|mSI|mDI);
3908 
3909     // Make sure ES contains proper segment value
3910     if (ty2 != TYnptr || ty1 != ty2)
3911         cdb.append(cod2_setES(ty1));
3912     else
3913     {}                              // ES is already same as DS
3914 
3915     if (need_DS)
3916         cdb.gen1(0x1F);                     // POP DS
3917     if (*pretregs)
3918         genmovreg(cdb,AX,DI);               // MOV AX,DI
3919     cdb.gen1(0xF3);                         // REP
3920     cdb.gen1(0xA4);                              // MOVSB
3921 
3922     if (need_DS)
3923     {   cdb.gen1(0x1F);                          // POP DS
3924         cdb.genadjesp(-(REGSIZE * 2));
3925     }
3926     fixresult(cdb,e,mAX | mES,pretregs);
3927 }
3928 
3929 /*********************************
3930  * Generate code for memcpy(s1,s2,n) intrinsic.
3931  *  OPmemcpy
3932  *   /   \
3933  * s1   OPparam
3934  *       /   \
3935  *      s2    n
3936  */
3937 
3938 @trusted
3939 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3940 {
3941     char need_DS;
3942     int segreg;
3943 
3944     /*
3945         MOV     SI,s2
3946         MOV     DX,s2+2
3947         MOV     CX,n
3948         LES     DI,s1
3949         PUSH    DS
3950         MOV     DS,DX
3951         MOV     AX,DI                   ;return value is s1
3952         REP     MOVSB
3953         POP     DS
3954     */
3955 
3956     elem *e2 = e.EV.E2;
3957     assert(e2.Eoper == OPparam);
3958 
3959     // Get s2 into DX:SI
3960     regm_t retregs2 = mSI;
3961     tym_t ty2 = e2.EV.E1.Ety;
3962     if (!tyreg(ty2))
3963         retregs2 |= mDX;
3964     codelem(cdb,e2.EV.E1,&retregs2,false);
3965 
3966     // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy())
3967     const zeroCheck = e2.EV.E2.Eoper != OPconst;
3968 
3969     // Get nbytes into CX
3970     regm_t retregs3 = mCX;
3971     scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3972     freenode(e2);
3973 
3974     // Get s1 into ES:DI
3975     regm_t retregs1 = mDI;
3976     tym_t ty1 = e.EV.E1.Ety;
3977     if (!tyreg(ty1))
3978         retregs1 |= mES;
3979     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
3980 
3981     ubyte rex = I64 ? REX_W : 0;
3982 
3983     // Make sure ES contains proper segment value
3984     cdb.append(cod2_setES(ty1));
3985 
3986     // Load DS with right value
3987     switch (tybasic(ty2))
3988     {
3989         case TYnptr:
3990         case TYimmutPtr:
3991             need_DS = false;
3992             break;
3993 
3994         case TYsptr:
3995             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3996                 segreg = SEG_SS;
3997             else
3998                 segreg = SEG_DS;
3999             goto L1;
4000 
4001         case TYcptr:
4002             segreg = SEG_CS;
4003         L1:
4004             cdb.gen1(0x1E);                        // PUSH DS
4005             cdb.gen1(0x06 + (segreg << 3));        // PUSH segreg
4006             cdb.gen1(0x1F);                        // POP  DS
4007             need_DS = true;
4008             break;
4009 
4010         case TYfptr:
4011         case TYvptr:
4012         case TYhptr:
4013             cdb.gen1(0x1E);                        // PUSH DS
4014             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
4015             need_DS = true;
4016             break;
4017 
4018         default:
4019             assert(0);
4020     }
4021 
4022     if (*pretregs)                              // if need return value
4023     {   getregs(cdb,mAX);
4024         genmovreg(cdb,AX,DI);
4025     }
4026 
4027     if (0 && I32 && config.flags4 & CFG4speed)
4028     {
4029         /* This is only faster if the memory is dword aligned, if not
4030          * it is significantly slower than just a rep movsb.
4031          */
4032         /*      mov     EDX,ECX
4033          *      shr     ECX,2
4034          *      jz      L1
4035          *      repe    movsd
4036          * L1:  nop
4037          *      and     EDX,3
4038          *      jz      L2
4039          *      mov     ECX,EDX
4040          *      repe    movsb
4041          * L2:  nop
4042          */
4043         getregs(cdb,mSI | mDI | mCX | mDX);
4044         genmovreg(cdb,DX,CX);                  // MOV EDX,ECX
4045         cdb.genc2(0xC1,modregrm(3,5,CX),2);                 // SHR ECX,2
4046         code *cx = gennop(null);
4047         genjmp(cdb, JE, FLcode, cast(block *)cx);  // JZ L1
4048         cdb.gen1(0xF3);                                     // REPE
4049         cdb.gen1(0xA5);                                     // MOVSW
4050         cdb.append(cx);
4051         cdb.genc2(0x81, modregrm(3,4,DX),3);                // AND EDX,3
4052 
4053         code *cnop = gennop(null);
4054         genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ L2
4055         genmovreg(cdb,CX,DX);                    // MOV ECX,EDX
4056         cdb.gen1(0xF3);                          // REPE
4057         cdb.gen1(0xA4);                          // MOVSB
4058         cdb.append(cnop);
4059     }
4060     else
4061     {
4062         getregs(cdb,mSI | mDI | mCX);
4063         code* cnop;
4064         if (zeroCheck)
4065         {
4066             cnop = gennop(null);
4067             gentstreg(cdb,CX);                           // TEST ECX,ECX
4068             if (I64)
4069                 code_orrex(cdb.last, REX_W);
4070             genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ cnop
4071         }
4072 
4073         if (I16 && config.flags4 & CFG4speed)          // if speed optimization
4074         {
4075             // Note this doesn't work if CX is 0
4076             cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX));        // SHR CX,1
4077             cdb.gen1(0xF3);                              // REPE
4078             cdb.gen1(0xA5);                              // MOVSW
4079             cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX));            // ADC CX,CX
4080         }
4081         cdb.gen1(0xF3);                             // REPE
4082         cdb.gen1(0xA4);                             // MOVSB
4083         if (zeroCheck)
4084             cdb.append(cnop);
4085         if (need_DS)
4086             cdb.gen1(0x1F);                         // POP DS
4087     }
4088     fixresult(cdb,e,mES|mAX,pretregs);
4089 }
4090 
4091 
4092 /*********************************
4093  * Generate code for memset(s,value,numbytes) intrinsic.
4094  *      (s OPmemset (numbytes OPparam value))
4095  */
4096 
4097 @trusted
4098 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4099 {
4100     regm_t retregs1;
4101     regm_t retregs3;
4102     reg_t reg;
4103     reg_t vreg;
4104     tym_t ty1;
4105     int segreg;
4106     targ_uns numbytes;
4107     uint m;
4108 
4109     //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs));
4110     elem *e2 = e.EV.E2;
4111     assert(e2.Eoper == OPparam);
4112 
4113     elem* evalue = e2.EV.E2;
4114     elem* enumbytes = e2.EV.E1;
4115 
4116     const sz = tysize(evalue.Ety);
4117     if (sz > 1)
4118     {
4119         cdmemsetn(cdb, e, pretregs);
4120         return;
4121     }
4122 
4123     const grex = I64 ? (REX_W << 16) : 0;
4124 
4125     bool valueIsConst = false;
4126     targ_size_t value;
4127     if (evalue.Eoper == OPconst)
4128     {
4129         value = el_tolong(evalue) & 0xFF;
4130         value |= value << 8;
4131         if (I32 || I64)
4132         {
4133             value |= value << 16;
4134             static if (value.sizeof == 8)
4135             if (I64)
4136                 value |= value << 32;
4137         }
4138         valueIsConst = true;
4139     }
4140     else if (evalue.Eoper == OPstrpar)  // happens if evalue is a struct of 0 size
4141     {
4142         value = 0;
4143         valueIsConst = true;
4144     }
4145     else
4146         value = 0xDEADBEEF;     // stop annoying false positives that value is not inited
4147 
4148     if (enumbytes.Eoper == OPconst)
4149     {
4150         numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes);
4151     }
4152 
4153     // Get nbytes into CX
4154     regm_t retregs2 = 0;
4155     if (enumbytes.Eoper != OPconst)
4156     {
4157         retregs2 = mCX;
4158         codelem(cdb,enumbytes,&retregs2,false);
4159     }
4160 
4161     // Get value into AX
4162     retregs3 = mAX;
4163     if (valueIsConst)
4164     {
4165         reg_t r;
4166         regwithvalue(cdb, mAX, value, r, I64?64:0);
4167         freenode(evalue);
4168     }
4169     else
4170     {
4171         scodelem(cdb,evalue,&retregs3,retregs2,false);
4172 
4173         getregs(cdb,mAX);
4174         if (I16)
4175         {
4176             cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL
4177         }
4178         else if (I32)
4179         {
4180             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4181             cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101
4182         }
4183         else
4184         {
4185             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4186             regm_t regm = allregs & ~(mAX | retregs2);
4187             reg_t r;
4188             regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,r,64); // MOV reg,0x01010101_01010101
4189             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r));        // IMUL RAX,reg
4190         }
4191     }
4192     freenode(e2);
4193 
4194     // Get s into ES:DI
4195     retregs1 = mDI;
4196     ty1 = e.EV.E1.Ety;
4197     if (!tyreg(ty1))
4198         retregs1 |= mES;
4199     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
4200     reg = DI; //findreg(retregs1);
4201 
4202     // Make sure ES contains proper segment value
4203     cdb.append(cod2_setES(ty1));
4204 
4205     if (*pretregs)                              // if need return value
4206     {
4207         getregs(cdb,mBX);
4208         genmovreg(cdb,BX,DI);                   // MOV EBX,EDI
4209     }
4210 
4211     if (enumbytes.Eoper == OPconst)
4212     {
4213         getregs(cdb,mDI);
4214         if (const numwords = numbytes / REGSIZE)
4215         {
4216             reg_t r;
4217             regwithvalue(cdb,mCX,numwords,r, I64 ? 64 : 0);
4218             getregs(cdb,mCX);
4219             cdb.gen1(0xF3);                     // REP
4220             cdb.gen1(STOS);                     // STOSW/D/Q
4221             if (I64)
4222                 code_orrex(cdb.last(), REX_W);
4223             regimmed_set(CX, 0);                // CX is now 0
4224         }
4225 
4226         auto remainder = numbytes & (REGSIZE - 1);
4227         if (I64 && remainder >= 4)
4228         {
4229             cdb.gen1(STOS);                     // STOSD
4230             remainder -= 4;
4231         }
4232         for (; remainder; --remainder)
4233             cdb.gen1(STOSB);                    // STOSB
4234         fixresult(cdb,e,mES|mBX,pretregs);
4235         return;
4236     }
4237 
4238     getregs(cdb,mDI | mCX);
4239     if (I16)
4240     {
4241         if (config.flags4 & CFG4speed)      // if speed optimization
4242         {
4243             cdb.gen2(0xD1,modregrm(3,5,CX));  // SHR CX,1
4244             cdb.gen1(0xF3);                   // REP
4245             cdb.gen1(STOS);                   // STOSW
4246             cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX
4247         }
4248         cdb.gen1(0xF3);                       // REP
4249         cdb.gen1(STOSB);                      // STOSB
4250         regimmed_set(CX, 0);                  // CX is now 0
4251         fixresult(cdb,e,mES|mBX,pretregs);
4252         return;
4253     }
4254 
4255     /*  MOV   sreg,ECX
4256         SHR   ECX,n
4257         REP
4258         STOSD/Q
4259 
4260         ADC   ECX,ECX
4261         REP
4262         STOSD
4263 
4264         MOV   ECX,sreg
4265         AND   ECX,3
4266         REP
4267         STOSB
4268      */
4269     regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI));
4270     reg_t sreg;
4271     allocreg(cdb,&regs,&sreg,TYint);
4272     genregs(cdb,0x89,CX,sreg);                        // MOV sreg,ECX (32 bits only)
4273 
4274     const n = I64 ? 3 : 2;
4275     cdb.genc2(0xC1, grex | modregrm(3,5,CX), n);      // SHR ECX,n
4276 
4277     cdb.gen1(0xF3);                                   // REP
4278     cdb.gen1(STOS);                                   // STOSD/Q
4279     if (I64)
4280         code_orrex(cdb.last(), REX_W);
4281 
4282     if (I64)
4283     {
4284         cdb.gen2(0x11,modregrm(3,CX,CX));             // ADC ECX,ECX
4285         cdb.gen1(0xF3);                               // REP
4286         cdb.gen1(STOS);                               // STOSD
4287     }
4288 
4289     genregs(cdb,0x89,sreg,CX);                        // MOV ECX,sreg (32 bits only)
4290     cdb.genc2(0x81, modregrm(3,4,CX), 3);             // AND ECX,3
4291     cdb.gen1(0xF3);                                   // REP
4292     cdb.gen1(STOSB);                                  // STOSB
4293 
4294     regimmed_set(CX, 0);                    // CX is now 0
4295     fixresult(cdb,e,mES|mBX,pretregs);
4296 }
4297 
4298 /***********************************************
4299  * Do memset for values larger than a byte.
4300  * Has many similarities to cod4.cdeq().
4301  * Doesn't work for 16 bit code.
4302  */
4303 @trusted
4304 private void cdmemsetn(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4305 {
4306     //printf("cdmemsetn(*pretregs = %s)\n", regm_str(*pretregs));
4307     elem *e2 = e.EV.E2;
4308     assert(e2.Eoper == OPparam);
4309 
4310     elem* evalue = e2.EV.E2;
4311     elem* enelems = e2.EV.E1;
4312 
4313     tym_t tymv = tybasic(evalue.Ety);
4314     const sz = tysize(evalue.Ety);
4315     assert(cast(int)sz > 1);
4316 
4317     if (tyxmmreg(tymv) && config.fpxmmregs)
4318         assert(0);      // fix later
4319     if (tyfloating(tymv) && config.inline8087)
4320         assert(0);      // fix later
4321 
4322     const grex = I64 ? (REX_W << 16) : 0;
4323 
4324     // get the count of elems into CX
4325     regm_t mregcx = mCX;
4326     codelem(cdb,enelems,&mregcx,false);
4327 
4328     // Get value into AX
4329     regm_t retregs3 = allregs & ~mregcx;
4330     if (sz == 2 * REGSIZE)
4331         retregs3 &= ~(mBP | IDXREGS);  // BP cannot be used for register pair,
4332                                        // IDXREGS could deplete index regs - see sdtor.d test14815()
4333     scodelem(cdb,evalue,&retregs3,mregcx,false);
4334 
4335     /* Necessary because if evalue calls a function, and that function never returns,
4336      * it doesn't affect registers. Which means those registers can be used for enregistering
4337      * variables, and next pass fails because it can't use those registers, and so cannot
4338      * allocate registers for retregs3. See ice11596.d
4339      */
4340     useregs(retregs3);
4341 
4342     reg_t valreg = findreg(retregs3);
4343     reg_t valreghi;
4344     if (sz == 2 * REGSIZE)
4345     {
4346         valreg = findreglsw(retregs3);
4347         valreghi = findregmsw(retregs3);
4348     }
4349 
4350     freenode(e2);
4351 
4352     // Get s into ES:DI
4353     regm_t mregidx = IDXREGS & ~(mregcx | retregs3);
4354     assert(mregidx);
4355     tym_t ty1 = tybasic(e.EV.E1.Ety);
4356     if (!tyreg(ty1))
4357         mregidx |= mES;
4358     scodelem(cdb,e.EV.E1,&mregidx,mregcx | retregs3,false);
4359     reg_t idxreg = findreg(mregidx);
4360 
4361     // Make sure ES contains proper segment value
4362     cdb.append(cod2_setES(ty1));
4363 
4364     regm_t mregbx = 0;
4365     if (*pretregs)                              // if need return value
4366     {
4367         mregbx = *pretregs & ~(mregidx | mregcx | retregs3);
4368         if (!mregbx)
4369             mregbx = allregs & ~(mregidx | mregcx | retregs3);
4370         reg_t regbx;
4371         allocreg(cdb, &mregbx, &regbx, TYnptr);
4372         getregs(cdb, mregbx);
4373         genmovreg(cdb,regbx,idxreg);            // MOV BX,DI
4374     }
4375 
4376     getregs(cdb,mask(idxreg) | mCX);            // modify DI and CX
4377 
4378     /* Generate:
4379      *  JCXZ L1
4380      * L2:
4381      *  MOV [idxreg],AX
4382      *  ADD idxreg,sz
4383      *  LOOP L2
4384      * L1:
4385      *  NOP
4386      */
4387     code* c1 = gennop(null);
4388     genjmp(cdb, JCXZ, FLcode, cast(block *)c1);
4389     code cs;
4390     buildEA(&cs,idxreg,-1,1,0);
4391     cs.Iop = 0x89;
4392     if (!I16 && sz == 2)
4393         cs.Iflags |= CFopsize;
4394     if (I64 && sz == 8)
4395         cs.Irex |= REX_W;
4396     code_newreg(&cs, valreg);
4397     cdb.gen(&cs);                                       // MOV [idxreg],AX
4398     code* c2 = cdb.last();
4399     if (sz == REGSIZE * 2)
4400     {
4401         cs.IEV1.Vuns = REGSIZE;
4402         code_newreg(&cs, valreghi);
4403         cdb.gen(&cs);                                   // MOV REGSIZE[idxreg],DX
4404     }
4405     cdb.genc2(0x81, grex | modregrmx(3,0,idxreg), sz);  // ADD idxreg,sz
4406     genjmp(cdb, LOOP, FLcode, cast(block *)c2);         // LOOP L2
4407     cdb.append(c1);
4408 
4409     regimmed_set(CX, 0);                  // CX is now 0
4410 
4411     fixresult(cdb,e,mregbx,pretregs);
4412 }
4413 
4414 /**********************
4415  * Do structure assignments.
4416  * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
4417  * Mebbe call cdstreq() for double assignments???
4418  */
4419 
4420 @trusted
4421 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4422 {
4423     char need_DS = false;
4424     elem *e1 = e.EV.E1;
4425     elem *e2 = e.EV.E2;
4426     int segreg;
4427     uint numbytes = cast(uint)type_size(e.ET);          // # of bytes in structure/union
4428     ubyte rex = I64 ? REX_W : 0;
4429 
4430     //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4431 
4432     // First, load pointer to rvalue into SI
4433     regm_t srcregs = mSI;                      // source is DS:SI
4434     docommas(cdb,e2);
4435     if (e2.Eoper == OPind)             // if (.. = *p)
4436     {   elem *e21 = e2.EV.E1;
4437 
4438         segreg = SEG_DS;
4439         switch (tybasic(e21.Ety))
4440         {
4441             case TYsptr:
4442                 if (config.wflags & WFssneds)   // if sptr can't use DS segment
4443                     segreg = SEG_SS;
4444                 break;
4445             case TYcptr:
4446                 if (!(config.exe & EX_flat))
4447                     segreg = SEG_CS;
4448                 break;
4449             case TYfptr:
4450             case TYvptr:
4451             case TYhptr:
4452                 srcregs |= mCX;         // get segment also
4453                 need_DS = true;
4454                 break;
4455 
4456             default:
4457                 break;
4458         }
4459         codelem(cdb,e21,&srcregs,false);
4460         freenode(e2);
4461         if (segreg != SEG_DS)           // if not DS
4462         {
4463             getregs(cdb,mCX);
4464             cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg
4465             need_DS = true;
4466         }
4467     }
4468     else if (e2.Eoper == OPvar)
4469     {
4470         if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment
4471         {   srcregs |= mCX;             // get segment also
4472             need_DS = true;
4473             cdrelconst(cdb,e2,&srcregs);
4474         }
4475         else
4476         {
4477             segreg = segfl[el_fl(e2)];
4478             if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack
4479                 segreg == SEG_CS)               // if source is in CS
4480             {
4481                 need_DS = true;         // we need to reload DS
4482                 // Load CX with segment
4483                 srcregs |= mCX;
4484                 getregs(cdb,mCX);
4485                 cdb.gen2(0x8C,                // MOV CX,[SS|CS]
4486                     modregrm(3,segreg,CX));
4487             }
4488             cdrelconst(cdb,e2,&srcregs);
4489         }
4490         freenode(e2);
4491     }
4492     else
4493     {
4494         if (!(config.exe & EX_flat))
4495         {   need_DS = true;
4496             srcregs |= mCX;
4497         }
4498         codelem(cdb,e2,&srcregs,false);
4499     }
4500 
4501     // now get pointer to lvalue (destination) in ES:DI
4502     regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI;
4503     if (e1.Eoper == OPind)               // if (*p = ..)
4504     {
4505         if (tyreg(e1.EV.E1.Ety))
4506             dstregs = mDI;
4507         cdb.append(cod2_setES(e1.EV.E1.Ety));
4508         scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false);
4509     }
4510     else
4511         cdrelconst(cdb,e1,&dstregs);
4512     freenode(e1);
4513 
4514     getregs(cdb,(srcregs | dstregs) & (mLSW | mDI));
4515     if (need_DS)
4516     {     assert(!(config.exe & EX_flat));
4517         cdb.gen1(0x1E);                     // PUSH DS
4518         cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));    // MOV DS,CX
4519     }
4520     if (numbytes <= REGSIZE * (6 + (REGSIZE == 4)))
4521     {
4522         while (numbytes >= REGSIZE)
4523         {
4524             cdb.gen1(0xA5);         // MOVSW
4525             code_orrex(cdb.last(), rex);
4526             numbytes -= REGSIZE;
4527         }
4528         //if (numbytes)
4529         //    printf("cdstreq numbytes %d\n",numbytes);
4530         if (I64 && numbytes >= 4)
4531         {
4532             cdb.gen1(0xA5);         // MOVSD
4533             numbytes -= 4;
4534         }
4535         while (numbytes--)
4536             cdb.gen1(0xA4);         // MOVSB
4537     }
4538     else
4539     {
4540 static if (1)
4541 {
4542         uint remainder = numbytes & (REGSIZE - 1);
4543         numbytes /= REGSIZE;            // number of words
4544         getregs_imm(cdb,mCX);
4545         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4546         cdb.gen1(0xF3);                 // REP
4547         if (REGSIZE == 8)
4548             cdb.gen1(REX | REX_W);
4549         cdb.gen1(0xA5);                 // REP MOVSD
4550         regimmed_set(CX,0);             // note that CX == 0
4551         if (I64 && remainder >= 4)
4552         {
4553             cdb.gen1(0xA5);         // MOVSD
4554             remainder -= 4;
4555         }
4556         for (; remainder; remainder--)
4557         {
4558             cdb.gen1(0xA4);             // MOVSB
4559         }
4560 }
4561 else
4562 {
4563         uint movs;
4564         if (numbytes & (REGSIZE - 1))   // if odd
4565             movs = 0xA4;                // MOVSB
4566         else
4567         {
4568             movs = 0xA5;                // MOVSW
4569             numbytes /= REGSIZE;        // # of words
4570         }
4571         getregs_imm(cdb,mCX);
4572         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4573         cdb.gen1(0xF3);                 // REP
4574         cdb.gen1(movs);
4575         regimmed_set(CX,0);             // note that CX == 0
4576 }
4577     }
4578     if (need_DS)
4579         cdb.gen1(0x1F);                 // POP  DS
4580     assert(!(*pretregs & mPSW));
4581     if (*pretregs)
4582     {   // ES:DI points past what we want
4583 
4584         cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET));   // SUB DI,numbytes
4585 
4586         const tym = tybasic(e.Ety);
4587         if (tym == TYucent && I64)
4588         {
4589             /* https://issues.dlang.org/show_bug.cgi?id=22175
4590              * The trouble happens when the struct size does not fit exactly into
4591              * 2 registers. Then the type of e becomes a TYucent, not a TYstruct,
4592              * and we need to dereference DI to get the ucent
4593              */
4594 
4595             // dereference DI
4596             code cs;
4597             cs.Iop = 0x8B;
4598             regm_t retregs = *pretregs;
4599             reg_t reg;
4600             allocreg(cdb,&retregs,&reg,tym);
4601 
4602             reg_t msreg = findregmsw(retregs);
4603             buildEA(&cs,DI,-1,1,REGSIZE);
4604             code_newreg(&cs,msreg);
4605             cs.Irex |= REX_W;
4606             cdb.gen(&cs);       // MOV msreg,REGSIZE[DI]        // msreg is never DI
4607 
4608             reg_t lsreg = findreglsw(retregs);
4609             buildEA(&cs,DI,-1,1,0);
4610             code_newreg(&cs,lsreg);
4611             cs.Irex |= REX_W;
4612             cdb.gen(&cs);       // MOV lsreg,[DI];
4613             fixresult(cdb,e,retregs,pretregs);
4614             return;
4615         }
4616 
4617         regm_t retregs = mDI;
4618         if (*pretregs & mMSW && !(config.exe & EX_flat))
4619             retregs |= mES;
4620         fixresult(cdb,e,retregs,pretregs);
4621     }
4622 }
4623 
4624 
4625 /**********************
4626  * Get the address of.
4627  * Is also called by cdstreq() to set up pointer to a structure.
4628  */
4629 
4630 @trusted
4631 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4632 {
4633     //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4634 
4635     /* The following should not happen, but cgelem.c is a little stupid.
4636      * Assertion can be tripped by func("string" == 0); and similar
4637      * things. Need to add goals to optelem() to fix this completely.
4638      */
4639     //assert((*pretregs & mPSW) == 0);
4640     if (*pretregs & mPSW)
4641     {
4642         *pretregs &= ~mPSW;
4643         gentstreg(cdb,SP);            // SP is never 0
4644         if (I64)
4645             code_orrex(cdb.last(), REX_W);
4646     }
4647     if (!*pretregs)
4648         return;
4649 
4650     assert(e);
4651     tym_t tym = tybasic(e.Ety);
4652     switch (tym)
4653     {
4654         case TYstruct:
4655         case TYarray:
4656         case TYldouble:
4657         case TYildouble:
4658         case TYcldouble:
4659             tym = TYnptr;               // don't confuse allocreg()
4660             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
4661             {
4662                 tym = TYfptr;
4663             }
4664             break;
4665 
4666         case TYifunc:
4667             tym = TYfptr;
4668             break;
4669 
4670         default:
4671             if (tyfunc(tym))
4672                 tym =
4673                     tyfarfunc(tym) ? TYfptr :
4674                     TYnptr;
4675             break;
4676     }
4677     //assert(tym & typtr);              // don't fail on (int)&a
4678 
4679     SC sclass;
4680     reg_t mreg,            // segment of the address (TYfptrs only)
4681           lreg;            // offset of the address
4682 
4683     allocreg(cdb,pretregs,&lreg,tym);
4684     if (_tysize[tym] > REGSIZE)            // fptr could've been cast to long
4685     {
4686         if (*pretregs & mES)
4687         {
4688             /* Do not allocate CX or SI here, as cdstreq() needs
4689              * them preserved. cdstreq() should use scodelem()
4690              */
4691             mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg));
4692         }
4693         else
4694         {
4695             mreg = lreg;
4696             lreg = findreglsw(*pretregs);
4697         }
4698 
4699         /* if (get segment of function that isn't necessarily in the
4700          * current segment (i.e. CS doesn't have the right value in it)
4701          */
4702         Symbol *s = e.EV.Vsym;
4703         if (s.Sfl == FLdatseg)
4704         {   assert(0);
4705         }
4706         sclass = s.Sclass;
4707         const ety = tybasic(s.ty());
4708         if ((tyfarfunc(ety) || ety == TYifunc) &&
4709             (sclass == SC.extern_ || ClassInline(sclass) || config.wflags & WFthunk)
4710             || s.Sfl == FLfardata
4711             || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SC.comdat))
4712            )
4713         {   // MOV mreg,seg of symbol
4714             cdb.gencs(0xB8 + mreg,0,FLextern,s);
4715             cdb.last().Iflags = CFseg;
4716         }
4717         else
4718         {
4719             const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl;
4720             cdb.gen2(0x8C,            // MOV mreg,SEG REGISTER
4721                 modregrm(3,segfl[fl],mreg));
4722         }
4723         if (*pretregs & mES)
4724             cdb.gen2(0x8E,modregrm(3,0,mreg));        // MOV ES,mreg
4725     }
4726     getoffset(cdb,e,lreg);
4727 }
4728 
4729 /*********************************
4730  * Load the offset portion of the address represented by e into
4731  * reg.
4732  */
4733 
4734 @trusted
4735 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg)
4736 {
4737     //printf("getoffset(e = %p, reg = %d)\n", e, reg);
4738     code cs = void;
4739     cs.Iflags = 0;
4740     ubyte rex = 0;
4741     cs.Irex = rex;
4742     assert(e.Eoper == OPvar || e.Eoper == OPrelconst);
4743     auto fl = el_fl(e);
4744     switch (fl)
4745     {
4746         case FLdatseg:
4747             cs.IEV2.Vpointer = e.EV.Vpointer;
4748             goto L3;
4749 
4750         case FLfardata:
4751             goto L4;
4752 
4753         case FLtlsdata:
4754         if (config.exe & EX_posix)
4755         {
4756           Lposix:
4757             if (config.flags3 & CFG3pic)
4758             {
4759                 if (I64)
4760                 {
4761                     /* Generate:
4762                      *   LEA DI,s@TLSGD[RIP]
4763                      */
4764                     //assert(reg == DI);
4765                     code css = void;
4766                     css.Irex = REX | REX_W;
4767                     css.Iop = LEA;
4768                     css.Irm = modregrm(0,reg,5);
4769                     if (reg & 8)
4770                         css.Irex |= REX_R;
4771                     css.Iflags = CFopsize;
4772                     css.IFL1 = cast(ubyte)fl;
4773                     css.IEV1.Vsym = e.EV.Vsym;
4774                     css.IEV1.Voffset = e.EV.Voffset;
4775                     cdb.gen(&css);
4776                 }
4777                 else
4778                 {
4779                     /* Generate:
4780                      *   LEA EAX,s@TLSGD[1*EBX+0]
4781                      */
4782                     assert(reg == AX);
4783                     load_localgot(cdb);
4784                     code css = void;
4785                     css.Iflags = 0;
4786                     css.Iop = LEA;             // LEA
4787                     css.Irex = 0;
4788                     css.Irm = modregrm(0,AX,4);
4789                     css.Isib = modregrm(0,BX,5);
4790                     css.IFL1 = cast(ubyte)fl;
4791                     css.IEV1.Vsym = e.EV.Vsym;
4792                     css.IEV1.Voffset = e.EV.Voffset;
4793                     cdb.gen(&css);
4794                 }
4795                 return;
4796             }
4797             /* Generate:
4798              *      MOV reg,GS:[00000000]
4799              *      ADD reg, offset s@TLS_LE
4800              * for locals, and for globals:
4801              *      MOV reg,GS:[00000000]
4802              *      ADD reg, s@TLS_IE
4803              * note different fixup
4804              */
4805             int stack = 0;
4806             if (reg == STACK)
4807             {   regm_t retregs = ALLREGS;
4808 
4809                 reg_t regx;
4810                 allocreg(cdb,&retregs,&regx,TYoffset);
4811                 reg = findreg(retregs);
4812                 stack = 1;
4813             }
4814 
4815             code css = void;
4816             css.Irex = rex;
4817             css.Iop = 0x8B;
4818             css.Irm = modregrm(0, 0, BPRM);
4819             code_newreg(&css, reg);
4820             css.Iflags = CFgs;
4821             css.IFL1 = FLconst;
4822             css.IEV1.Vuns = 0;
4823             cdb.gen(&css);               // MOV reg,GS:[00000000]
4824 
4825             if (e.EV.Vsym.Sclass == SC.static_ || e.EV.Vsym.Sclass == SC.locstat)
4826             {   // ADD reg, offset s
4827                 cs.Irex = rex;
4828                 cs.Iop = 0x81;
4829                 cs.Irm = modregrm(3,0,reg & 7);
4830                 if (reg & 8)
4831                     cs.Irex |= REX_B;
4832                 cs.Iflags = CFoff;
4833                 cs.IFL2 = cast(ubyte)fl;
4834                 cs.IEV2.Vsym = e.EV.Vsym;
4835                 cs.IEV2.Voffset = e.EV.Voffset;
4836             }
4837             else
4838             {   // ADD reg, s
4839                 cs.Irex = rex;
4840                 cs.Iop = 0x03;
4841                 cs.Irm = modregrm(0,0,BPRM);
4842                 code_newreg(&cs, reg);
4843                 cs.Iflags = CFoff;
4844                 cs.IFL1 = cast(ubyte)fl;
4845                 cs.IEV1.Vsym = e.EV.Vsym;
4846                 cs.IEV1.Voffset = e.EV.Voffset;
4847             }
4848             cdb.gen(&cs);                // ADD reg, xxxx
4849 
4850             if (stack)
4851             {
4852                 cdb.gen1(0x50 + (reg & 7));      // PUSH reg
4853                 if (reg & 8)
4854                     code_orrex(cdb.last(), REX_B);
4855                 cdb.genadjesp(REGSIZE);
4856                 stackchanged = 1;
4857             }
4858             break;
4859         }
4860         else if (config.exe & EX_windos)
4861         {
4862             if (I64)
4863             {
4864             Lwin64:
4865                 assert(reg != STACK);
4866                 cs.IEV2.Vsym = e.EV.Vsym;
4867                 cs.IEV2.Voffset = e.EV.Voffset;
4868                 cs.Iop = 0xB8 + (reg & 7);      // MOV Ereg,offset s
4869                 if (reg & 8)
4870                     cs.Irex |= REX_B;
4871                 cs.Iflags = CFoff;              // want offset only
4872                 cs.IFL2 = cast(ubyte)fl;
4873                 cdb.gen(&cs);
4874                 break;
4875             }
4876             goto L4;
4877         }
4878         else
4879         {
4880             goto L4;
4881         }
4882 
4883         case FLfunc:
4884             fl = FLextern;                  /* don't want PC relative addresses */
4885             goto L4;
4886 
4887         case FLextern:
4888             if (config.exe & EX_posix && e.EV.Vsym.ty() & mTYthread)
4889                 goto Lposix;
4890             if (config.exe & EX_WIN64 && e.EV.Vsym.ty() & mTYthread)
4891                 goto Lwin64;
4892             goto L4;
4893 
4894         case FLdata:
4895         case FLudata:
4896         case FLgot:
4897         case FLgotoff:
4898         case FLcsdata:
4899         L4:
4900             cs.IEV2.Vsym = e.EV.Vsym;
4901             cs.IEV2.Voffset = e.EV.Voffset;
4902         L3:
4903             if (reg == STACK)
4904             {   stackchanged = 1;
4905                 cs.Iop = 0x68;              /* PUSH immed16                 */
4906                 cdb.genadjesp(REGSIZE);
4907             }
4908             else
4909             {   cs.Iop = 0xB8 + (reg & 7);  // MOV reg,immed16
4910                 if (reg & 8)
4911                     cs.Irex |= REX_B;
4912                 if (I64)
4913                 {   cs.Irex |= REX_W;
4914                     if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
4915                     {   // LEA reg,immed32[RIP]
4916                         cs.Iop = LEA;
4917                         cs.Irm = modregrm(0,reg & 7,5);
4918                         if (reg & 8)
4919                             cs.Irex = (cs.Irex & ~REX_B) | REX_R;
4920                         cs.IFL1 = cast(ubyte)fl;
4921                         cs.IEV1.Vsym = cs.IEV2.Vsym;
4922                         cs.IEV1.Voffset = cs.IEV2.Voffset;
4923                     }
4924                 }
4925             }
4926             cs.Iflags = CFoff;              /* want offset only             */
4927             cs.IFL2 = cast(ubyte)fl;
4928             cdb.gen(&cs);
4929             break;
4930 
4931         case FLreg:
4932             /* Allow this since the tree optimizer puts & in front of       */
4933             /* register doubles.                                            */
4934             goto L2;
4935         case FLauto:
4936         case FLfast:
4937         case FLbprel:
4938         case FLfltreg:
4939             reflocal = true;
4940             goto L2;
4941         case FLpara:
4942             refparam = true;
4943         L2:
4944             if (reg == STACK)
4945             {   regm_t retregs = ALLREGS;
4946 
4947                 reg_t regx;
4948                 allocreg(cdb,&retregs,&regx,TYoffset);
4949                 reg = findreg(retregs);
4950                 loadea(cdb,e,&cs,LEA,reg,0,0,0);    // LEA reg,EA
4951                 if (I64)
4952                     code_orrex(cdb.last(), REX_W);
4953                 cdb.gen1(0x50 + (reg & 7));               // PUSH reg
4954                 if (reg & 8)
4955                     code_orrex(cdb.last(), REX_B);
4956                 cdb.genadjesp(REGSIZE);
4957                 stackchanged = 1;
4958             }
4959             else
4960             {
4961                 loadea(cdb,e,&cs,LEA,reg,0,0,0);   // LEA reg,EA
4962                 if (I64)
4963                     code_orrex(cdb.last(), REX_W);
4964             }
4965             break;
4966 
4967         default:
4968             debug
4969             {
4970                 elem_print(e);
4971                 WRFL(fl);
4972             }
4973             assert(0);
4974     }
4975 }
4976 
4977 
4978 /******************
4979  * OPneg, OPsqrt, OPsin, OPcos, OPrint
4980  */
4981 
4982 @trusted
4983 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4984 {
4985     //printf("cdneg()\n");
4986     //elem_print(e);
4987     if (*pretregs == 0)
4988     {
4989         codelem(cdb,e.EV.E1,pretregs,false);
4990         return;
4991     }
4992     const tyml = tybasic(e.EV.E1.Ety);
4993     const sz = _tysize[tyml];
4994     if (tyfloating(tyml))
4995     {
4996         if (tycomplex(tyml))
4997         {
4998             neg_complex87(cdb, e, pretregs);
4999             return;
5000         }
5001         if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS)
5002         {
5003             xmmneg(cdb,e,pretregs);
5004             return;
5005         }
5006         if (config.inline8087 &&
5007             ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64))
5008             {
5009                 neg87(cdb,e,pretregs);
5010                 return;
5011             }
5012         regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
5013         codelem(cdb,e.EV.E1,&retregs,false);
5014         getregs(cdb,retregs);
5015         if (I32)
5016         {
5017             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
5018             cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit
5019         }
5020         else
5021         {
5022             const reg = (sz == 8) ? AX : findregmsw(retregs);
5023             cdb.genc2(0x81,modregrm(3,6,reg),0x8000);     // XOR AX,0x8000
5024         }
5025         fixresult(cdb,e,retregs,pretregs);
5026         return;
5027     }
5028 
5029     const uint isbyte = sz == 1;
5030     const possregs = (isbyte) ? BYTEREGS : allregs;
5031     regm_t retregs = *pretregs & possregs;
5032     if (retregs == 0)
5033         retregs = possregs;
5034     codelem(cdb,e.EV.E1,&retregs,false);
5035     getregs(cdb,retregs);                // retregs will be destroyed
5036     if (sz <= REGSIZE)
5037     {
5038         const reg = findreg(retregs);
5039         uint rex = (I64 && sz == 8) ? REX_W : 0;
5040         if (I64 && sz == 1 && reg >= 4)
5041             rex |= REX;
5042         cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg));   // NEG reg
5043         if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW)
5044             cdb.last().Iflags |= CFopsize | CFpsw;
5045         *pretregs &= mBP | ALLREGS;             // flags already set
5046     }
5047     else if (sz == 2 * REGSIZE)
5048     {
5049         const msreg = findregmsw(retregs);
5050         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5051         const lsreg = findreglsw(retregs);
5052         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg
5053         code_orflag(cdb.last(), CFpsw);           // need flag result of previous NEG
5054         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5055     }
5056     else
5057         assert(0);
5058     fixresult(cdb,e,retregs,pretregs);
5059 }
5060 
5061 
5062 /******************
5063  * Absolute value operator
5064  */
5065 
5066 
5067 @trusted
5068 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
5069 {
5070     //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
5071     if (*pretregs == 0)
5072     {
5073         codelem(cdb,e.EV.E1,pretregs,false);
5074         return;
5075     }
5076     const tyml = tybasic(e.EV.E1.Ety);
5077     const sz = _tysize[tyml];
5078     const rex = (I64 && sz == 8) ? REX_W : 0;
5079     if (tyfloating(tyml))
5080     {
5081         if (tyxmmreg(tyml) && *pretregs & XMMREGS)
5082         {
5083             xmmabs(cdb,e,pretregs);
5084             return;
5085         }
5086         if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64))
5087         {
5088             neg87(cdb,e,pretregs);
5089             return;
5090         }
5091         regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
5092         codelem(cdb,e.EV.E1,&retregs,false);
5093         getregs(cdb,retregs);
5094         if (I32)
5095         {
5096             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
5097             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit
5098         }
5099         else
5100         {
5101             const reg = (sz == 8) ? AX : findregmsw(retregs);
5102             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF);     // AND AX,0x7FFF
5103         }
5104         fixresult(cdb,e,retregs,pretregs);
5105         return;
5106     }
5107 
5108     const uint isbyte = sz == 1;
5109     assert(isbyte == 0);
5110     regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs;
5111     if (!I16 && sz == REGSIZE)
5112         possregs = allregs;
5113     regm_t retregs = *pretregs & possregs;
5114     if (retregs == 0)
5115         retregs = possregs;
5116     codelem(cdb,e.EV.E1,&retregs,false);
5117     getregs(cdb,retregs);                // retregs will be destroyed
5118     if (sz <= REGSIZE)
5119     {
5120         /*      CWD
5121                 XOR     AX,DX
5122                 SUB     AX,DX
5123            or:
5124                 MOV     r,reg
5125                 SAR     r,63
5126                 XOR     reg,r
5127                 SUB     reg,r
5128          */
5129         reg_t reg;
5130         reg_t r;
5131 
5132         if (!I16 && sz == REGSIZE)
5133         {
5134             reg = findreg(retregs);
5135             r = allocScratchReg(cdb, allregs & ~retregs);
5136             getregs(cdb,retregs);
5137             genmovreg(cdb,r,reg);                     // MOV r,reg
5138             cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1);      // SAR r,31/63
5139             code_orrex(cdb.last(), rex);
5140         }
5141         else
5142         {
5143             reg = AX;
5144             r = DX;
5145             getregs(cdb,mDX);
5146             if (!I16 && sz == SHORTSIZE)
5147                 cdb.gen1(0x98);                         // CWDE
5148             cdb.gen1(0x99);                             // CWD
5149             code_orrex(cdb.last(), rex);
5150         }
5151         cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r
5152         cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r
5153         if (!I16 && sz == SHORTSIZE && *pretregs & mPSW)
5154             cdb.last().Iflags |= CFopsize | CFpsw;
5155         if (*pretregs & mPSW)
5156             cdb.last().Iflags |= CFpsw;
5157         *pretregs &= ~mPSW;                     // flags already set
5158     }
5159     else if (sz == 2 * REGSIZE)
5160     {
5161         /*      or      DX,DX
5162                 jns     L2
5163                 neg     DX
5164                 neg     AX
5165                 sbb     DX,0
5166             L2:
5167          */
5168 
5169         code *cnop = gennop(null);
5170         const msreg = findregmsw(retregs);
5171         const lsreg = findreglsw(retregs);
5172         genregs(cdb,0x09,msreg,msreg);            // OR msreg,msreg
5173         genjmp(cdb,JNS,FLcode,cast(block *)cnop);
5174         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5175         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg+1
5176         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5177         cdb.append(cnop);
5178     }
5179     else
5180         assert(0);
5181     fixresult(cdb,e,retregs,pretregs);
5182 }
5183 
5184 /**************************
5185  * Post increment and post decrement.
5186  */
5187 
5188 @trusted
5189 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5190 {
5191     //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs));
5192     code cs = void;
5193     const op = e.Eoper;                      // OPxxxx
5194     if (*pretregs == 0)                        // if nothing to return
5195     {
5196         cdaddass(cdb,e,pretregs);
5197         return;
5198     }
5199     const tym_t tyml = tybasic(e.EV.E1.Ety);
5200     const sz = _tysize[tyml];
5201     elem *e2 = e.EV.E2;
5202     const rex = (I64 && sz == 8) ? REX_W : 0;
5203 
5204     if (tyfloating(tyml))
5205     {
5206         if (config.fpxmmregs && tyxmmreg(tyml) &&
5207             !tycomplex(tyml) // SIMD code is not set up to deal with complex
5208            )
5209         {
5210             xmmpost(cdb,e,pretregs);
5211             return;
5212         }
5213 
5214         if (config.inline8087)
5215         {
5216             post87(cdb,e,pretregs);
5217             return;
5218         }
5219 if (config.exe & EX_windos)
5220 {
5221         assert(sz <= 8);
5222         getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS);
5223         freenode(e.EV.E1);
5224         regm_t idxregs = idxregm(&cs);  // mask of index regs used
5225         cs.Iop = 0x8B;                  /* MOV DOUBLEREGS,EA            */
5226         fltregs(cdb,&cs,tyml);
5227         stackchanged = 1;
5228         int stackpushsave = stackpush;
5229         regm_t retregs;
5230         if (sz == 8)
5231         {
5232             if (I32)
5233             {
5234                 cdb.gen1(0x50 + DX);             // PUSH DOUBLEREGS
5235                 cdb.gen1(0x50 + AX);
5236                 stackpush += DOUBLESIZE;
5237                 retregs = DOUBLEREGS2_32;
5238             }
5239             else
5240             {
5241                 cdb.gen1(0x50 + AX);
5242                 cdb.gen1(0x50 + BX);
5243                 cdb.gen1(0x50 + CX);
5244                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5245                 stackpush += DOUBLESIZE + DOUBLESIZE;
5246 
5247                 cdb.gen1(0x50 + AX);
5248                 cdb.gen1(0x50 + BX);
5249                 cdb.gen1(0x50 + CX);
5250                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5251                 retregs = DOUBLEREGS_16;
5252             }
5253         }
5254         else
5255         {
5256             stackpush += FLOATSIZE;     /* so we know something is on   */
5257             if (!I32)
5258                 cdb.gen1(0x50 + DX);
5259             cdb.gen1(0x50 + AX);
5260             retregs = FLOATREGS2;
5261         }
5262         cdb.genadjesp(stackpush - stackpushsave);
5263 
5264         cgstate.stackclean++;
5265         scodelem(cdb,e2,&retregs,idxregs,false);
5266         cgstate.stackclean--;
5267 
5268         if (tyml == TYdouble || tyml == TYdouble_alias)
5269         {
5270             retregs = DOUBLEREGS;
5271             callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub,
5272                     &retregs,idxregs);
5273         }
5274         else /* tyml == TYfloat */
5275         {
5276             retregs = FLOATREGS;
5277             callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub,
5278                     &retregs,idxregs);
5279         }
5280         cs.Iop = 0x89;                  /* MOV EA,DOUBLEREGS            */
5281         fltregs(cdb,&cs,tyml);
5282         stackpushsave = stackpush;
5283         if (tyml == TYdouble || tyml == TYdouble_alias)
5284         {   if (*pretregs == mSTACK)
5285                 retregs = mSTACK;       /* leave result on stack        */
5286             else
5287             {
5288                 if (I32)
5289                 {
5290                     cdb.gen1(0x58 + AX);
5291                     cdb.gen1(0x58 + DX);
5292                 }
5293                 else
5294                 {
5295                     cdb.gen1(0x58 + DX);
5296                     cdb.gen1(0x58 + CX);
5297                     cdb.gen1(0x58 + BX);
5298                     cdb.gen1(0x58 + AX);
5299                 }
5300                 stackpush -= DOUBLESIZE;
5301                 retregs = DOUBLEREGS;
5302             }
5303         }
5304         else
5305         {
5306             cdb.gen1(0x58 + AX);
5307             if (!I32)
5308                 cdb.gen1(0x58 + DX);
5309             stackpush -= FLOATSIZE;
5310             retregs = FLOATREGS;
5311         }
5312         cdb.genadjesp(stackpush - stackpushsave);
5313         fixresult(cdb,e,retregs,pretregs);
5314         return;
5315 }
5316     }
5317     if (tyxmmreg(tyml))
5318     {
5319         xmmpost(cdb,e,pretregs);
5320         return;
5321     }
5322 
5323     assert(e2.Eoper == OPconst);
5324     uint isbyte = (sz == 1);
5325     regm_t possregs = isbyte ? BYTEREGS : allregs;
5326     getlvalue(cdb,&cs,e.EV.E1,0);
5327     freenode(e.EV.E1);
5328     regm_t idxregs = idxregm(&cs);       // mask of index regs used
5329     if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 &&
5330         (!I16 || (idxregs & (mBX | mSI | mDI | mBP))))
5331     {
5332         // Generate:
5333         //      TEST    reg,reg
5334         //      LEA     reg,n[reg]      // don't affect flags
5335         reg_t reg = cs.Irm & 7;
5336         if (cs.Irex & REX_B)
5337             reg |= 8;
5338         cs.Iop = 0x85 ^ isbyte;
5339         code_newreg(&cs, reg);
5340         cs.Iflags |= CFpsw;
5341         cdb.gen(&cs);             // TEST reg,reg
5342 
5343         // If lvalue is a register variable, we must mark it as modified
5344         modEA(cdb,&cs);
5345 
5346         auto n = e2.EV.Vint;
5347         if (op == OPpostdec)
5348             n = -n;
5349         int rm = reg;
5350         if (I16)
5351         {
5352             static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c
5353             rm = regtorm[reg];
5354         }
5355         cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg]
5356         return;
5357     }
5358     else if (sz <= REGSIZE || tyfv(tyml))
5359     {
5360         code cs2 = void;
5361 
5362         cs.Iop = 0x8B ^ isbyte;
5363         regm_t retregs = possregs & ~idxregs & *pretregs;
5364         if (!tyfv(tyml))
5365         {
5366             if (retregs == 0)
5367                 retregs = possregs & ~idxregs;
5368         }
5369         else /* tyfv(tyml) */
5370         {
5371             if ((retregs &= mLSW) == 0)
5372                 retregs = mLSW & ~idxregs;
5373             /* Can't use LES if the EA uses ES as a seg override    */
5374             if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5375             {   cs.Iop = 0xC4;                      /* LES          */
5376                 getregs(cdb,mES);           // allocate ES
5377             }
5378         }
5379         reg_t reg;
5380         allocreg(cdb,&retregs,&reg,TYint);
5381         code_newreg(&cs, reg);
5382         if (sz == 1 && I64 && reg >= 4)
5383             cs.Irex |= REX;
5384         cdb.gen(&cs);                     // MOV reg,EA
5385         cs2 = cs;
5386 
5387         /* If lvalue is a register variable, we must mark it as modified */
5388         modEA(cdb,&cs);
5389 
5390         cs.Iop = 0x81 ^ isbyte;
5391         cs.Irm &= ~cast(int)modregrm(0,7,0);             // reg field = 0
5392         cs.Irex &= ~REX_R;
5393         if (op == OPpostdec)
5394             cs.Irm |= modregrm(0,5,0);  /* SUB                  */
5395         cs.IFL2 = FLconst;
5396         targ_int n = e2.EV.Vint;
5397         cs.IEV2.Vint = n;
5398         if (n == 1)                     /* can use INC or DEC           */
5399         {
5400             cs.Iop |= 0xFE;             /* xFE is dec byte, xFF is word */
5401             if (op == OPpostdec)
5402                 NEWREG(cs.Irm,1);       // DEC EA
5403             else
5404                 NEWREG(cs.Irm,0);       // INC EA
5405         }
5406         else if (n == -1)               // can use INC or DEC
5407         {
5408             cs.Iop |= 0xFE;             // xFE is dec byte, xFF is word
5409             if (op == OPpostinc)
5410                 NEWREG(cs.Irm,1);       // DEC EA
5411             else
5412                 NEWREG(cs.Irm,0);       // INC EA
5413         }
5414 
5415         // For scheduling purposes, we wish to replace:
5416         //      MOV     reg,EA
5417         //      OP      EA
5418         // with:
5419         //      MOV     reg,EA
5420         //      OP      reg
5421         //      MOV     EA,reg
5422         //      ~OP     reg
5423         if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
5424             config.target_cpu >= TARGET_Pentium &&
5425             config.flags4 & CFG4speed)
5426         {
5427             // Replace EA in cs with reg
5428             cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7);
5429             if (reg & 8)
5430             {   cs.Irex &= ~REX_R;
5431                 cs.Irex |= REX_B;
5432             }
5433             else
5434                 cs.Irex &= ~REX_B;
5435             if (I64 && sz == 1 && reg >= 4)
5436                 cs.Irex |= REX;
5437             cdb.gen(&cs);                        // ADD/SUB reg,const
5438 
5439             // Reverse MOV direction
5440             cs2.Iop ^= 2;
5441             cdb.gen(&cs2);                       // MOV EA,reg
5442 
5443             // Toggle INC <. DEC, ADD <. SUB
5444             cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0);
5445             cdb.gen(&cs);
5446 
5447             if (*pretregs & mPSW)
5448             {   *pretregs &= ~mPSW;              // flags already set
5449                 code_orflag(cdb.last(),CFpsw);
5450             }
5451         }
5452         else
5453             cdb.gen(&cs);                        // ADD/SUB EA,const
5454 
5455         freenode(e2);
5456         if (tyfv(tyml))
5457         {
5458             reg_t preg;
5459 
5460             getlvalue_msw(&cs);
5461             if (*pretregs & mES)
5462             {
5463                 preg = ES;
5464                 /* ES is already loaded if CFes is 0            */
5465                 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP;
5466                 NEWREG(cs.Irm,0);       /* MOV ES,EA+2          */
5467             }
5468             else
5469             {
5470                 regm_t retregsx = *pretregs & mMSW;
5471                 if (!retregsx)
5472                     retregsx = mMSW;
5473                 allocreg(cdb,&retregsx,&preg,TYint);
5474                 cs.Iop = 0x8B;
5475                 if (I32)
5476                     cs.Iflags |= CFopsize;
5477                 NEWREG(cs.Irm,preg);    /* MOV preg,EA+2        */
5478             }
5479             getregs(cdb,mask(preg));
5480             cdb.gen(&cs);
5481             retregs = mask(reg) | mask(preg);
5482         }
5483         fixresult(cdb,e,retregs,pretregs);
5484         return;
5485     }
5486     else if (tyml == TYhptr)
5487     {
5488         uint rvalue;
5489         reg_t lreg;
5490         reg_t rtmp;
5491         regm_t mtmp;
5492 
5493         rvalue = e2.EV.Vlong;
5494         freenode(e2);
5495 
5496         // If h--, convert to h++
5497         if (e.Eoper == OPpostdec)
5498             rvalue = -rvalue;
5499 
5500         regm_t retregs = mLSW & ~idxregs & *pretregs;
5501         if (!retregs)
5502             retregs = mLSW & ~idxregs;
5503         allocreg(cdb,&retregs,&lreg,TYint);
5504 
5505         // Can't use LES if the EA uses ES as a seg override
5506         if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5507         {   cs.Iop = 0xC4;
5508             retregs |= mES;
5509             getregs(cdb,mES|mCX);       // allocate ES
5510             cs.Irm |= modregrm(0,lreg,0);
5511             cdb.gen(&cs);                       // LES lreg,EA
5512         }
5513         else
5514         {   cs.Iop = 0x8B;
5515             retregs |= mDX;
5516             getregs(cdb,mDX|mCX);
5517             cs.Irm |= modregrm(0,lreg,0);
5518             cdb.gen(&cs);                       // MOV lreg,EA
5519             NEWREG(cs.Irm,DX);
5520             getlvalue_msw(&cs);
5521             cdb.gen(&cs);                       // MOV DX,EA+2
5522             getlvalue_lsw(&cs);
5523         }
5524 
5525         // Allocate temporary register, rtmp
5526         mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs;
5527         allocreg(cdb,&mtmp,&rtmp,TYint);
5528 
5529         movregconst(cdb,rtmp,rvalue >> 16,0);   // MOV rtmp,e2+2
5530         getregs(cdb,mtmp);
5531         cs.Iop = 0x81;
5532         NEWREG(cs.Irm,0);
5533         cs.IFL2 = FLconst;
5534         cs.IEV2.Vint = rvalue;
5535         cdb.gen(&cs);                           // ADD EA,e2
5536         code_orflag(cdb.last(),CFpsw);
5537         cdb.genc2(0x81,modregrm(3,2,rtmp),0);   // ADC rtmp,0
5538         genshift(cdb);                          // MOV CX,offset __AHSHIFT
5539         cdb.gen2(0xD3,modregrm(3,4,rtmp));      // SHL rtmp,CL
5540         cs.Iop = 0x01;
5541         NEWREG(cs.Irm,rtmp);                    // ADD EA+2,rtmp
5542         getlvalue_msw(&cs);
5543         cdb.gen(&cs);
5544         fixresult(cdb,e,retregs,pretregs);
5545         return;
5546     }
5547     else if (sz == 2 * REGSIZE)
5548     {
5549         regm_t retregs = allregs & ~idxregs & *pretregs;
5550         if ((retregs & mLSW) == 0)
5551                 retregs |= mLSW & ~idxregs;
5552         if ((retregs & mMSW) == 0)
5553                 retregs |= ALLREGS & mMSW;
5554         assert(retregs & mMSW && retregs & mLSW);
5555         reg_t reg;
5556         allocreg(cdb,&retregs,&reg,tyml);
5557         uint sreg = findreglsw(retregs);
5558         cs.Iop = 0x8B;
5559         cs.Irm |= modregrm(0,sreg,0);
5560         cdb.gen(&cs);                   // MOV sreg,EA
5561         NEWREG(cs.Irm,reg);
5562         getlvalue_msw(&cs);
5563         cdb.gen(&cs);                   // MOV reg,EA+2
5564         cs.Iop = 0x81;
5565         cs.Irm &= ~cast(int)modregrm(0,7,0);     /* reg field = 0 for ADD        */
5566         if (op == OPpostdec)
5567             cs.Irm |= modregrm(0,5,0);  /* SUB                          */
5568         getlvalue_lsw(&cs);
5569         cs.IFL2 = FLconst;
5570         cs.IEV2.Vlong = e2.EV.Vlong;
5571         cdb.gen(&cs);                   // ADD/SUB EA,const
5572         code_orflag(cdb.last(),CFpsw);
5573         getlvalue_msw(&cs);
5574         cs.IEV2.Vlong = 0;
5575         if (op == OPpostinc)
5576             cs.Irm ^= modregrm(0,2,0);  /* ADC                          */
5577         else
5578             cs.Irm ^= modregrm(0,6,0);  /* SBB                          */
5579         cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8));
5580         cdb.gen(&cs);                   // ADC/SBB EA,0
5581         freenode(e2);
5582         fixresult(cdb,e,retregs,pretregs);
5583         return;
5584     }
5585     else
5586     {
5587         assert(0);
5588     }
5589 }
5590 
5591 
5592 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5593 {
5594     debug
5595         elem_print(e);
5596 
5597     //printf("op = %d, %d\n", e.Eoper, OPstring);
5598     //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen);
5599     //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring);
5600     assert(0);
5601 }
5602 
5603 @trusted
5604 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5605 {
5606     switch (e.EV.E1.Eoper)
5607     {
5608         case OPdctor:
5609             codelem(cdb,e.EV.E2,pretregs,false);
5610             regm_t retregs = 0;
5611             codelem(cdb,e.EV.E1,&retregs,false);
5612             break;
5613         default:
5614             assert(0);
5615     }
5616 }
5617 
5618 /*******************************************
5619  * D constructor.
5620  * OPdctor
5621  */
5622 
5623 @trusted
5624 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5625 {
5626     /* Generate:
5627         ESCAPE | ESCdctor
5628         MOV     sindex[BP],index
5629      */
5630     usednteh |= EHcleanup;
5631     if (config.ehmethod == EHmethod.EH_WIN32)
5632     {   usednteh |= NTEHcleanup | NTEH_try;
5633         nteh_usevars();
5634     }
5635     assert(*pretregs == 0);
5636     code cs;
5637     cs.Iop = ESCAPE | ESCdctor;         // mark start of EH range
5638     cs.Iflags = 0;
5639     cs.Irex = 0;
5640     cs.IFL1 = FLctor;
5641     cs.IEV1.Vtor = e;
5642     cdb.gen(&cs);
5643     nteh_gensindex(cdb,0);              // the actual index will be patched in later
5644                                         // by except_fillInEHTable()
5645 }
5646 
5647 /*******************************************
5648  * D destructor.
5649  * OPddtor
5650  */
5651 
5652 @trusted
5653 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5654 {
5655     if (config.ehmethod == EHmethod.EH_DWARF)
5656     {
5657         usednteh |= EHcleanup;
5658 
5659         code cs;
5660         cs.Iop = ESCAPE | ESCddtor;     // mark end of EH range and where landing pad is
5661         cs.Iflags = 0;
5662         cs.Irex = 0;
5663         cs.IFL1 = FLdtor;
5664         cs.IEV1.Vtor = e;
5665         cdb.gen(&cs);
5666 
5667         // Mark all registers as destroyed
5668         getregsNoSave(allregs);
5669 
5670         assert(*pretregs == 0);
5671         codelem(cdb,e.EV.E1,pretregs,false);
5672         return;
5673     }
5674     else
5675     {
5676         /* Generate:
5677             ESCAPE | ESCddtor
5678             MOV     sindex[BP],index
5679             CALL    dtor
5680             JMP     L1
5681         Ldtor:
5682             ... e.EV.E1 ...
5683             RET
5684         L1: NOP
5685         */
5686         usednteh |= EHcleanup;
5687         if (config.ehmethod == EHmethod.EH_WIN32)
5688         {   usednteh |= NTEHcleanup | NTEH_try;
5689             nteh_usevars();
5690         }
5691 
5692         code cs;
5693         cs.Iop = ESCAPE | ESCddtor;
5694         cs.Iflags = 0;
5695         cs.Irex = 0;
5696         cs.IFL1 = FLdtor;
5697         cs.IEV1.Vtor = e;
5698         cdb.gen(&cs);
5699 
5700         nteh_gensindex(cdb,0);              // the actual index will be patched in later
5701                                             // by except_fillInEHTable()
5702 
5703         // Mark all registers as destroyed
5704         getregsNoSave(allregs);
5705 
5706         assert(*pretregs == 0);
5707         CodeBuilder cdbx;
5708         cdbx.ctor();
5709         codelem(cdbx,e.EV.E1,pretregs,false);
5710         cdbx.gen1(0xC3);                      // RET
5711         code *c = cdbx.finish();
5712 
5713         int nalign = 0;
5714         if (STACKALIGN >= 16)
5715         {
5716             nalign = STACKALIGN - REGSIZE;
5717             cod3_stackadj(cdb, nalign);
5718         }
5719         calledafunc = 1;
5720         genjmp(cdb,0xE8,FLcode,cast(block *)c);   // CALL Ldtor
5721         if (nalign)
5722             cod3_stackadj(cdb, -nalign);
5723 
5724         code *cnop = gennop(null);
5725 
5726         genjmp(cdb,JMP,FLcode,cast(block *)cnop);
5727         cdb.append(cdbx);
5728         cdb.append(cnop);
5729         return;
5730     }
5731 }
5732 
5733 
5734 /*******************************************
5735  * C++ constructor.
5736  */
5737 
5738 @trusted
5739 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5740 {
5741 }
5742 
5743 /******
5744  * OPdtor
5745  */
5746 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5747 {
5748 }
5749 
5750 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5751 {
5752 }
5753 
5754 static if (!NTEXCEPTIONS)
5755 {
5756 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5757 {
5758     assert(0);
5759 }
5760 }
5761 
5762 /*****************************************
5763  */
5764 
5765 @trusted
5766 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5767 {
5768     assert(*pretregs == 0);
5769     codelem(cdb,e.EV.E1,pretregs,false);
5770 }
5771 
5772 /*****************************************
5773  */
5774 
5775 @trusted
5776 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5777 {
5778     assert(*pretregs == 0);
5779     cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
5780 }