1 /**
2  * Code generation 4
3  *
4  * Includes:
5  * - assignemt variations of operators (+= -= *= /= %= <<= >>=)
6  * - integer comparison (< > <= >=)
7  * - converting integers to a different size (e.g. short to int)
8  * - bit instructions (bit scan, population count)
9  *
10  * Compiler implementation of the
11  * $(LINK2 https://www.dlang.org, D programming language).
12  *
13  * Mostly code generation for assignment operators.
14  *
15  * Copyright:   Copyright (C) 1985-1998 by Symantec
16  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
17  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
18  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
19  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d)
20  * Documentation:  https://dlang.org/phobos/dmd_backend_cod4.html
21  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d
22  */
23 
24 module dmd.backend.cod4;
25 
26 import core.stdc.stdio;
27 import core.stdc.stdlib;
28 import core.stdc.string;
29 
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.code_x86;
34 import dmd.backend.codebuilder;
35 import dmd.backend.mem;
36 import dmd.backend.el;
37 import dmd.backend.global;
38 import dmd.backend.oper;
39 import dmd.backend.ty;
40 import dmd.backend.evalu8 : el_toldoubled;
41 import dmd.backend.xmm;
42 
43 
44 nothrow:
45 @safe:
46 
47 import dmd.backend.cg : datafl;
48 
49                         /*   AX,CX,DX,BX                */
50 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ];
51 
52 import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients;
53 
54 /*******************************
55  * Return number of times symbol s appears in tree e.
56  */
57 
58 @trusted
59 private int intree(Symbol *s,elem *e)
60 {
61     if (!OTleaf(e.Eoper))
62         return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0);
63     return e.Eoper == OPvar && e.EV.Vsym == s;
64 }
65 
66 /***********************************
67  * Determine if expression e can be evaluated directly into register
68  * variable s.
69  * Have to be careful about things like x=x+x+x, and x=a+x.
70  * Returns:
71  *      !=0     can
72  *      0       can't
73  */
74 
75 @trusted
76 int doinreg(Symbol *s, elem *e)
77 {
78     int in_ = 0;
79     OPER op;
80 
81  L1:
82     op = e.Eoper;
83     if (op == OPind ||
84         OTcall(op)  ||
85         OTleaf(op) ||
86         (in_ = intree(s,e)) == 0 ||
87         (OTunary(op) && OTleaf(e.EV.E1.Eoper))
88        )
89         return 1;
90     if (in_ == 1)
91     {
92         switch (op)
93         {
94             case OPadd:
95             case OPmin:
96             case OPand:
97             case OPor:
98             case OPxor:
99             case OPshl:
100             case OPmul:
101                 if (!intree(s,e.EV.E2))
102                 {
103                     e = e.EV.E1;
104                     goto L1;
105                 }
106                 break;
107 
108             default:
109                 break;
110         }
111     }
112     return 0;
113 }
114 
115 /****************************
116  * Return code for saving common subexpressions if EA
117  * turns out to be a register.
118  * This is called just before modifying an EA.
119  */
120 
121 void modEA(ref CodeBuilder cdb,code *c)
122 {
123     if ((c.Irm & 0xC0) == 0xC0)        // addressing mode refers to a register
124     {
125         reg_t reg = c.Irm & 7;
126         if (c.Irex & REX_B)
127         {   reg |= 8;
128             assert(I64);
129         }
130         getregs(cdb,mask(reg));
131     }
132 }
133 
134 
135 /****************************
136  * Gen code for op= for doubles.
137  */
138 @trusted
139 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op)
140 {
141     assert(config.exe & EX_windos);  // for targets that may not have an 8087
142 
143     static immutable uint[OPdivass - OPpostinc + 1] clibtab =
144     /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass       */
145     [  CLIB.dadd, CLIB.dsub, cast(uint)-1,  CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ];
146 
147     if (config.inline8087)
148     {
149         opass87(cdb,e,pretregs);
150         return;
151     }
152 
153     code cs;
154     regm_t retregs2,retregs,idxregs;
155 
156     uint clib = clibtab[op - OPpostinc];
157     elem *e1 = e.EV.E1;
158     tym_t tym = tybasic(e1.Ety);
159     getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX);
160 
161     if (tym == TYfloat)
162     {
163         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
164 
165         // Load EA into FLOATREGS
166         getregs(cdb,FLOATREGS);
167         cs.Iop = LOD;
168         cs.Irm |= modregrm(0,AX,0);
169         cdb.gen(&cs);
170 
171         if (!I32)
172         {
173             cs.Irm |= modregrm(0,DX,0);
174             getlvalue_msw(&cs);
175             cdb.gen(&cs);
176             getlvalue_lsw(&cs);
177 
178         }
179         retregs2 = FLOATREGS2;
180         idxregs = FLOATREGS | idxregm(&cs);
181         retregs = FLOATREGS;
182     }
183     else
184     {
185         if (I32)
186         {
187             // Load EA into DOUBLEREGS
188             getregs(cdb,DOUBLEREGS_32);
189             cs.Iop = LOD;
190             cs.Irm |= modregrm(0,AX,0);
191             cdb.gen(&cs);
192             cs.Irm |= modregrm(0,DX,0);
193             getlvalue_msw(&cs);
194             cdb.gen(&cs);
195             getlvalue_lsw(&cs);
196 
197             retregs2 = DOUBLEREGS2_32;
198             idxregs = DOUBLEREGS_32 | idxregm(&cs);
199         }
200         else
201         {
202             // Push EA onto stack
203             cs.Iop = 0xFF;
204             cs.Irm |= modregrm(0,6,0);
205             cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
206             cdb.gen(&cs);
207             getlvalue_lsw(&cs);
208             cdb.gen(&cs);
209             getlvalue_lsw(&cs);
210             cdb.gen(&cs);
211             getlvalue_lsw(&cs);
212             cdb.gen(&cs);
213             stackpush += DOUBLESIZE;
214 
215             retregs2 = DOUBLEREGS_16;
216             idxregs = idxregm(&cs);
217         }
218         retregs = DOUBLEREGS;
219     }
220 
221     if ((cs.Iflags & CFSEG) == CFes)
222         idxregs |= mES;
223     cgstate.stackclean++;
224     scodelem(cdb,e.EV.E2,&retregs2,idxregs,false);
225     cgstate.stackclean--;
226     callclib(cdb,e,clib,&retregs,0);
227     if (e1.Ecount)
228         cssave(e1,retregs,!OTleaf(e1.Eoper));             // if lvalue is a CSE
229     freenode(e1);
230     cs.Iop = STO;                              // MOV EA,DOUBLEREGS
231     fltregs(cdb,&cs,tym);
232     fixresult(cdb,e,retregs,pretregs);
233 }
234 
235 /****************************
236  * Gen code for OPnegass for doubles.
237  */
238 
239 @trusted
240 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
241 {
242     assert(config.exe & EX_windos);  // for targets that may not have an 8087
243 
244     if (config.inline8087)
245     {
246         cdnegass87(cdb,e,pretregs);
247         return;
248     }
249     elem *e1 = e.EV.E1;
250     tym_t tym = tybasic(e1.Ety);
251     int sz = _tysize[tym];
252     code cs;
253 
254     getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0);
255     modEA(cdb,&cs);
256     cs.Irm |= modregrm(0,6,0);
257     cs.Iop = 0x80;
258     cs.IEV1.Voffset += sz - 1;
259     cs.IFL2 = FLconst;
260     cs.IEV2.Vuns = 0x80;
261     cdb.gen(&cs);                       // XOR 7[EA],0x80
262     if (tycomplex(tym))
263     {
264         cs.IEV1.Voffset -= sz / 2;
265         cdb.gen(&cs);                   // XOR 7[EA],0x80
266     }
267 
268     regm_t retregs;
269     if (*pretregs || e1.Ecount)
270     {
271         cs.IEV1.Voffset -= sz - 1;
272 
273         if (tym == TYfloat)
274         {
275             // Load EA into FLOATREGS
276             getregs(cdb,FLOATREGS);
277             cs.Iop = LOD;
278             NEWREG(cs.Irm, AX);
279             cdb.gen(&cs);
280 
281             if (!I32)
282             {
283                 NEWREG(cs.Irm, DX);
284                 getlvalue_msw(&cs);
285                 cdb.gen(&cs);
286                 getlvalue_lsw(&cs);
287 
288             }
289             retregs = FLOATREGS;
290         }
291         else
292         {
293             if (I32)
294             {
295                 // Load EA into DOUBLEREGS
296                 getregs(cdb,DOUBLEREGS_32);
297                 cs.Iop = LOD;
298                 cs.Irm &= ~cast(uint)modregrm(0,7,0);
299                 cs.Irm |= modregrm(0,AX,0);
300                 cdb.gen(&cs);
301                 cs.Irm |= modregrm(0,DX,0);
302                 getlvalue_msw(&cs);
303                 cdb.gen(&cs);
304                 getlvalue_lsw(&cs);
305             }
306             else
307             {
308                 static if (1)
309                 {
310                     cs.Iop = LOD;
311                     fltregs(cdb,&cs,TYdouble);     // MOV DOUBLEREGS, EA
312                 }
313                 else
314                 {
315                     // Push EA onto stack
316                     cs.Iop = 0xFF;
317                     cs.Irm |= modregrm(0,6,0);
318                     cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
319                     cdb.gen(&cs);
320                     cs.IEV1.Voffset -= REGSIZE;
321                     cdb.gen(&cs);
322                     cs.IEV1.Voffset -= REGSIZE;
323                     cdb.gen(&cs);
324                     cs.IEV1.Voffset -= REGSIZE;
325                     cdb.gen(&cs);
326                     stackpush += DOUBLESIZE;
327                 }
328             }
329             retregs = DOUBLEREGS;
330         }
331         if (e1.Ecount)
332             cssave(e1,retregs,!OTleaf(e1.Eoper));         /* if lvalue is a CSE   */
333     }
334     else
335     {
336         retregs = 0;
337         assert(e1.Ecount == 0);
338     }
339 
340     freenode(e1);
341     fixresult(cdb,e,retregs,pretregs);
342 }
343 
344 
345 
346 /************************
347  * Generate code for an assignment.
348  */
349 
350 @trusted
351 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
352 {
353     tym_t tymll;
354     reg_t reg;
355     code cs;
356     elem *e11;
357     bool regvar;                  // true means evaluate into register variable
358     regm_t varregm;
359     reg_t varreg;
360     targ_int postinc;
361 
362     //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
363     elem *e1 = e.EV.E1;
364     elem *e2 = e.EV.E2;
365     int e2oper = e2.Eoper;
366     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
367     regm_t retregs = *pretregs;
368 
369     if (tyxmmreg(tyml) && config.fpxmmregs)
370     {
371         xmmeq(cdb, e, CMP, e1, e2, pretregs);
372         return;
373     }
374 
375     if (tyfloating(tyml) && config.inline8087)
376     {
377         if (tycomplex(tyml))
378         {
379             complex_eq87(cdb, e, pretregs);
380             return;
381         }
382 
383         if (!(retregs == 0 &&
384               (e2oper == OPconst || e2oper == OPvar || e2oper == OPind))
385            )
386         {
387             eq87(cdb,e,pretregs);
388             return;
389         }
390         if (config.target_cpu >= TARGET_PentiumPro &&
391             (e2oper == OPvar || e2oper == OPind)
392            )
393         {
394             eq87(cdb,e,pretregs);
395             return;
396         }
397         if (tyml == TYldouble || tyml == TYildouble)
398         {
399             eq87(cdb,e,pretregs);
400             return;
401         }
402     }
403 
404     uint sz = _tysize[tyml];           // # of bytes to transfer
405     assert(cast(int)sz > 0);
406 
407     if (retregs == 0)                     // if no return value
408     {
409         int fl;
410 
411         /* If registers are tight, and we might need them for the lvalue,
412          * prefer to not use them for the rvalue
413          */
414         bool plenty = true;
415         if (e1.Eoper == OPind)
416         {
417             /* Will need 1 register for evaluation, +2 registers for
418              * e1's addressing mode
419              */
420             regm_t m = allregs & ~regcon.mvar;  // mask of non-register variables
421             m &= m - 1;         // clear least significant bit
422             m &= m - 1;         // clear least significant bit
423             plenty = m != 0;    // at least 3 registers
424         }
425 
426         if ((e2oper == OPconst ||       // if rvalue is a constant
427              e2oper == OPrelconst &&
428              !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
429              ((fl = el_fl(e2)) == FLdata ||
430               fl==FLudata || fl == FLextern)
431               && !(e2.EV.Vsym.ty() & mTYcs)
432             ) &&
433             !(evalinregister(e2) && plenty) &&
434             !e1.Ecount)        // and no CSE headaches
435         {
436             // Look for special case of (*p++ = ...), where p is a register variable
437             if (e1.Eoper == OPind &&
438                 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
439                 e11.EV.E1.Eoper == OPvar &&
440                 e11.EV.E1.EV.Vsym.Sfl == FLreg &&
441                 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
442                )
443             {
444                 Symbol *s = e11.EV.E1.EV.Vsym;
445                 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
446                 {
447                     regcon.params &= ~s.Spregm();
448                 }
449                 postinc = e11.EV.E2.EV.Vint;
450                 if (e11.Eoper == OPpostdec)
451                     postinc = -postinc;
452                 getlvalue(cdb,&cs,e1,RMstore);
453                 freenode(e11.EV.E2);
454             }
455             else
456             {
457                 postinc = 0;
458                 getlvalue(cdb,&cs,e1,RMstore);
459 
460                 if (e2oper == OPconst &&
461                     config.flags4 & CFG4speed &&
462                     (config.target_cpu == TARGET_Pentium ||
463                      config.target_cpu == TARGET_PentiumMMX) &&
464                     (cs.Irm & 0xC0) == 0x80
465                    )
466                 {
467                     if (I64 && sz == 8 && e2.EV.Vpointer)
468                     {
469                         // MOV reg,imm64
470                         // MOV EA,reg
471                         regm_t rregm = allregs & ~idxregm(&cs);
472                         reg_t regx;
473                         regwithvalue(cdb,rregm,e2.EV.Vpointer,regx,64);
474                         cs.Iop = STO;
475                         cs.Irm |= modregrm(0,regx & 7,0);
476                         if (regx & 8)
477                             cs.Irex |= REX_R;
478                         cdb.gen(&cs);
479                         freenode(e2);
480                         goto Lp;
481                     }
482                     if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint)
483                     {
484                         // MOV reg,imm
485                         // MOV EA,reg
486                         regm_t rregm = allregs & ~idxregm(&cs);
487                         reg_t regx;
488                         regwithvalue(cdb,rregm,e2.EV.Vint,regx,0);
489                         cs.Iop = STO;
490                         cs.Irm |= modregrm(0,regx & 7,0);
491                         if (regx & 8)
492                             cs.Irex |= REX_R;
493                         cdb.gen(&cs);
494                         freenode(e2);
495                         goto Lp;
496                     }
497                     if (sz == 2 * REGSIZE && e2.EV.Vllong == 0)
498                     {
499                         // MOV reg,imm
500                         // MOV EA,reg
501                         // MOV EA+2,reg
502                         regm_t rregm = getscratch() & ~idxregm(&cs);
503                         if (rregm)
504                         {
505                             reg_t regx;
506                             regwithvalue(cdb,rregm,e2.EV.Vint,regx,0);
507                             cs.Iop = STO;
508                             cs.Irm |= modregrm(0,regx,0);
509                             cdb.gen(&cs);
510                             getlvalue_msw(&cs);
511                             cdb.gen(&cs);
512                             freenode(e2);
513                             goto Lp;
514                         }
515                     }
516                 }
517             }
518 
519             // If loading result into a register
520             if ((cs.Irm & 0xC0) == 0xC0)
521             {
522                 modEA(cdb,&cs);
523                 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg)
524                     getregs(cdb,cs.IEV1.Vsym.Sregm);
525             }
526             cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
527 
528             if (e2oper == OPrelconst)
529             {
530                 cs.IEV2.Voffset = e2.EV.Voffset;
531                 cs.IFL2 = cast(ubyte)fl;
532                 cs.IEV2.Vsym = e2.EV.Vsym;
533                 cs.Iflags |= CFoff;
534                 cdb.gen(&cs);       // MOV EA,&variable
535                 if (I64 && sz == 8)
536                     code_orrex(cdb.last(), REX_W);
537                 if (sz > REGSIZE)
538                 {
539                     cs.Iop = 0x8C;
540                     getlvalue_msw(&cs);
541                     cs.Irm |= modregrm(0,3,0);
542                     cdb.gen(&cs);   // MOV EA+2,DS
543                 }
544             }
545             else
546             {
547                 assert(e2oper == OPconst);
548                 cs.IFL2 = FLconst;
549                 targ_size_t *p = cast(targ_size_t *) &(e2.EV);
550                 cs.IEV2.Vsize_t = *p;
551                 // Look for loading a register variable
552                 if ((cs.Irm & 0xC0) == 0xC0)
553                 {
554                     reg_t regx = cs.Irm & 7;
555 
556                     if (cs.Irex & REX_B)
557                         regx |= 8;
558                     if (I64 && sz == 8)
559                         movregconst(cdb,regx,*p,64);
560                     else
561                         movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1));
562                     if (sz == 2 * REGSIZE)
563                     {   getlvalue_msw(&cs);
564                         if (REGSIZE == 2)
565                             movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0);
566                         else if (REGSIZE == 4)
567                             movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0);
568                         else if (REGSIZE == 8)
569                             movregconst(cdb,cs.Irm & 7,p[1],0);
570                         else
571                             assert(0);
572                     }
573                 }
574                 else if (I64 && sz == 8 && *p >= 0x80000000)
575                 {   // Use 64 bit MOV, as the 32 bit one gets sign extended
576                     // MOV reg,imm64
577                     // MOV EA,reg
578                     regm_t rregm = allregs & ~idxregm(&cs);
579                     reg_t regx;
580                     regwithvalue(cdb,rregm,*p,regx,64);
581                     cs.Iop = STO;
582                     cs.Irm |= modregrm(0,regx & 7,0);
583                     if (regx & 8)
584                         cs.Irex |= REX_R;
585                     cdb.gen(&cs);
586                 }
587                 else
588                 {
589                     int off = sz;
590                     do
591                     {   int regsize = REGSIZE;
592                         if (off >= 4 && I16 && config.target_cpu >= TARGET_80386)
593                         {
594                             regsize = 4;
595                             cs.Iflags |= CFopsize;      // use opsize to do 32 bit operation
596                         }
597                         else if (I64 && sz == 16 && *p >= 0x80000000)
598                         {
599                             regm_t rregm = allregs & ~idxregm(&cs);
600                             reg_t regx;
601                             regwithvalue(cdb,rregm,*p,regx,64);
602                             cs.Iop = STO;
603                             cs.Irm |= modregrm(0,regx & 7,0);
604                             if (regx & 8)
605                                 cs.Irex |= REX_R;
606                         }
607                         else
608                         {
609                             regm_t retregsx = (sz == 1) ? BYTEREGS : allregs;
610                             reg_t regx;
611                             if (reghasvalue(retregsx,*p,regx))
612                             {
613                                 cs.Iop = (cs.Iop & 1) | 0x88;
614                                 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx
615                                 if (regx & 8)
616                                     cs.Irex |= REX_R;
617                                 if (I64 && sz == 1 && regx >= 4)
618                                     cs.Irex |= REX;
619                             }
620                             if (!I16 && off == 2)      // if 16 bit operand
621                                 cs.Iflags |= CFopsize;
622                             if (I64 && sz == 8)
623                                 cs.Irex |= REX_W;
624                         }
625                         cdb.gen(&cs);           // MOV EA,const
626 
627                         p = cast(targ_size_t *)(cast(char *) p + regsize);
628                         cs.Iop = (cs.Iop & 1) | 0xC6;
629                         cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0);
630                         cs.Irex &= ~REX_R;
631                         cs.IEV1.Voffset += regsize;
632                         cs.IEV2.Vint = cast(int)*p;
633                         off -= regsize;
634                     } while (off > 0);
635                 }
636             }
637             freenode(e2);
638             goto Lp;
639         }
640         retregs = allregs;        // pick a reg, any reg
641         if (sz == 2 * REGSIZE)
642             retregs &= ~mBP;      // BP cannot be used for register pair
643     }
644     if (retregs == mPSW)
645     {
646         retregs = allregs;
647         if (sz == 2 * REGSIZE)
648             retregs &= ~mBP;      // BP cannot be used for register pair
649     }
650     cs.Iop = STO;
651     if (sz == 1)                  // must have byte regs
652     {
653         cs.Iop = 0x88;
654         retregs &= BYTEREGS;
655         if (!retregs)
656             retregs = BYTEREGS;
657     }
658     else if (retregs & mES &&
659            (
660              (e1.Eoper == OPind &&
661                 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) ||
662              (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata)
663            )
664           )
665         // getlvalue() needs ES, so we can't return it
666         retregs = allregs;              // no conflicts with ES
667     else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0)
668         retregs = DOUBLEREGS;
669 
670     regvar = false;
671     varregm = 0;
672     if (config.flags4 & CFG4optimized)
673     {
674         // Be careful of cases like (x = x+x+x). We cannot evaluate in
675         // x if x is in a register.
676         if (isregvar(e1,varregm,varreg) &&    // if lvalue is register variable
677             doinreg(e1.EV.Vsym,e2) &&       // and we can compute directly into it
678             !(sz == 1 && e1.EV.Voffset == 1)
679            )
680         {
681             if (varregm & XMMREGS)
682             {
683                 // Could be an integer vector in the XMMREGS
684                 xmmeq(cdb, e, CMP, e1, e2, pretregs);
685                 return;
686             }
687             regvar = true;
688             retregs = varregm;
689             reg = varreg;       // evaluate directly in target register
690             if (tysize(e1.Ety) == REGSIZE &&
691                 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE)
692             {
693                 if (e1.EV.Voffset)
694                     retregs &= mMSW;
695                 else
696                     retregs &= mLSW;
697                 reg = findreg(retregs);
698             }
699         }
700     }
701     if (*pretregs & mPSW && OTleaf(e1.Eoper))     // if evaluating e1 couldn't change flags
702     {   // Be careful that this lines up with jmpopcode()
703         retregs |= mPSW;
704         *pretregs &= ~mPSW;
705     }
706     scodelem(cdb,e2,&retregs,0,true);    // get rvalue
707 
708     // Look for special case of (*p++ = ...), where p is a register variable
709     if (e1.Eoper == OPind &&
710         ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
711         e11.EV.E1.Eoper == OPvar &&
712         e11.EV.E1.EV.Vsym.Sfl == FLreg &&
713         (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
714        )
715     {
716         Symbol *s = e11.EV.E1.EV.Vsym;
717         if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
718         {
719             regcon.params &= ~s.Spregm();
720         }
721 
722         postinc = e11.EV.E2.EV.Vint;
723         if (e11.Eoper == OPpostdec)
724             postinc = -postinc;
725         getlvalue(cdb,&cs,e1,RMstore | retregs);
726         freenode(e11.EV.E2);
727     }
728     else
729     {
730         postinc = 0;
731         getlvalue(cdb,&cs,e1,RMstore | retregs);     // get lvalue (cl == null if regvar)
732     }
733 
734     getregs(cdb,varregm);
735 
736     assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes));
737     if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES)
738     {
739         reg = findreglsw(retregs);
740         cs.Irm |= modregrm(0,reg,0);
741         cdb.gen(&cs);                   // MOV EA,reg
742         getlvalue_msw(&cs);             // point to where segment goes
743         cs.Iop = 0x8C;
744         NEWREG(cs.Irm,0);
745         cdb.gen(&cs);                   // MOV EA+2,ES
746     }
747     else
748     {
749         if (!I16)
750         {
751             reg = findreg(retregs &
752                     ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS));
753             cs.Irm |= modregrm(0,reg & 7,0);
754             if (reg & 8)
755                 cs.Irex |= REX_R;
756             for (; true; sz -= REGSIZE)
757             {
758                 // Do not generate mov from register onto itself
759                 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))
760                     break;
761                 if (sz == 2)            // if 16 bit operand
762                     cs.Iflags |= CFopsize;
763                 else if (sz == 1 && reg >= 4)
764                     cs.Irex |= REX;
765                 cdb.gen(&cs);           // MOV EA+offset,reg
766                 if (sz <= REGSIZE)
767                     break;
768                 getlvalue_msw(&cs);
769                 reg = findregmsw(retregs);
770                 code_newreg(&cs, reg);
771             }
772         }
773         else
774         {
775             if (sz > REGSIZE)
776                 cs.IEV1.Voffset += sz - REGSIZE;  // 0,2,6
777             reg = findreg(retregs &
778                     (sz > REGSIZE ? mMSW : ALLREGS));
779             if (tyml == TYdouble || tyml == TYdouble_alias)
780                 reg = AX;
781             cs.Irm |= modregrm(0,reg,0);
782             // Do not generate mov from register onto itself
783             if (!regvar || reg != (cs.Irm & 7))
784                 for (; true; sz -= REGSIZE)             // 1,2,4
785                 {
786                     cdb.gen(&cs);             // MOV EA+offset,reg
787                     if (sz <= REGSIZE)
788                         break;
789                     cs.IEV1.Voffset -= REGSIZE;
790                     if (tyml == TYdouble || tyml == TYdouble_alias)
791                             reg = dblreg[reg];
792                     else
793                             reg = findreglsw(retregs);
794                     NEWREG(cs.Irm,reg);
795                 }
796         }
797     }
798     if (e1.Ecount ||                    // if lvalue is a CSE or
799         regvar)                         // rvalue can't be a CSE
800     {
801         getregs_imm(cdb,retregs);       // necessary if both lvalue and
802                                         //  rvalue are CSEs (since a reg
803                                         //  can hold only one e at a time)
804         cssave(e1,retregs,!OTleaf(e1.Eoper));     // if lvalue is a CSE
805     }
806 
807     fixresult(cdb,e,retregs,pretregs);
808 Lp:
809     if (postinc)
810     {
811         reg_t ireg = findreg(idxregm(&cs));
812         if (*pretregs & mPSW)
813         {   // Use LEA to avoid touching the flags
814             uint rm = cs.Irm & 7;
815             if (cs.Irex & REX_B)
816                 rm |= 8;
817             cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc);
818             if (tysize(e11.EV.E1.Ety) == 8)
819                 code_orrex(cdb.last(), REX_W);
820         }
821         else if (I64)
822         {
823             cdb.genc2(0x81,modregrmx(3,0,ireg),postinc);
824             if (tysize(e11.EV.E1.Ety) == 8)
825                 code_orrex(cdb.last(), REX_W);
826         }
827         else
828         {
829             if (postinc == 1)
830                 cdb.gen1(0x40 + ireg);        // INC ireg
831             else if (postinc == -cast(targ_int)1)
832                 cdb.gen1(0x48 + ireg);        // DEC ireg
833             else
834             {
835                 cdb.genc2(0x81,modregrm(3,0,ireg),postinc);
836             }
837         }
838     }
839     freenode(e1);
840 }
841 
842 
843 /************************
844  * Generate code for += -= &= |= ^= negass
845  */
846 
847 @trusted
848 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
849 {
850     //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
851     OPER op = e.Eoper;
852     regm_t retregs = 0;
853     uint reverse = 0;
854     elem *e1 = e.EV.E1;
855     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
856     int sz = _tysize[tyml];
857     int isbyte = (sz == 1);                     // 1 for byte operation, else 0
858 
859     // See if evaluate in XMM registers
860     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
861     {
862         xmmopass(cdb,e,pretregs);
863         return;
864     }
865 
866     if (tyfloating(tyml))
867     {
868         if (config.exe & EX_posix)
869         {
870             if (op == OPnegass)
871                 cdnegass87(cdb,e,pretregs);
872             else
873                 opass87(cdb,e,pretregs);
874         }
875         else
876         {
877             if (op == OPnegass)
878                 opnegassdbl(cdb,e,pretregs);
879             else
880                 opassdbl(cdb,e,pretregs,op);
881         }
882         return;
883     }
884     uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386)
885         ? CFopsize : 0;
886     uint cflags = 0;
887     regm_t forccs = *pretregs & mPSW;            // return result in flags
888     regm_t forregs = *pretregs & ~mPSW;          // return result in regs
889     // true if we want the result in a register
890     uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper));
891 
892     reg_t reg;
893     uint op1,op2,mode;
894     code cs;
895     elem *e2;
896     regm_t varregm;
897     reg_t varreg;
898     uint jop;
899 
900 
901     switch (op)                   // select instruction opcodes
902     {
903         case OPpostinc: op = OPaddass;                  // i++ => +=
904                         goto case OPaddass;
905 
906         case OPaddass:  op1 = 0x01; op2 = 0x11;
907                         cflags = CFpsw;
908                         mode = 0; break;                // ADD, ADC
909 
910         case OPpostdec: op = OPminass;                  // i-- => -=
911                         goto case OPminass;
912 
913         case OPminass:  op1 = 0x29; op2 = 0x19;
914                         cflags = CFpsw;
915                         mode = 5; break;                // SUB, SBC
916 
917         case OPandass:  op1 = op2 = 0x21;
918                         mode = 4; break;                // AND, AND
919 
920         case OPorass:   op1 = op2 = 0x09;
921                         mode = 1; break;                // OR , OR
922 
923         case OPxorass:  op1 = op2 = 0x31;
924                         mode = 6; break;                // XOR, XOR
925 
926         case OPnegass:  op1 = 0xF7;                     // NEG
927                         break;
928 
929         default:
930                 assert(0);
931     }
932     op1 ^= isbyte;                  // bit 0 is 0 for byte operation
933 
934     if (op == OPnegass)
935     {
936         getlvalue(cdb,&cs,e1,0);
937         modEA(cdb,&cs);
938         cs.Irm |= modregrm(0,3,0);
939         cs.Iop = op1;
940         switch (_tysize[tyml])
941         {
942             case CHARSIZE:
943                 cdb.gen(&cs);
944                 break;
945 
946             case SHORTSIZE:
947                 cdb.gen(&cs);
948                 if (!I16 && *pretregs & mPSW)
949                     cdb.last().Iflags |= CFopsize | CFpsw;
950                 break;
951 
952             case LONGSIZE:
953                 if (!I16 || opsize)
954                 {   cdb.gen(&cs);
955                     cdb.last().Iflags |= opsize;
956                     break;
957                 }
958             neg_2reg:
959                 getlvalue_msw(&cs);
960                 cdb.gen(&cs);              // NEG EA+2
961                 getlvalue_lsw(&cs);
962                 cdb.gen(&cs);              // NEG EA
963                 code_orflag(cdb.last(),CFpsw);
964                 cs.Iop = 0x81;
965                 getlvalue_msw(&cs);
966                 cs.IFL2 = FLconst;
967                 cs.IEV2.Vuns = 0;
968                 cdb.gen(&cs);              // SBB EA+2,0
969                 break;
970 
971             case LLONGSIZE:
972                 if (I16)
973                     assert(0);             // not implemented yet
974                 if (I32)
975                     goto neg_2reg;
976                 cdb.gen(&cs);
977                 break;
978 
979             default:
980                 assert(0);
981         }
982         forccs = 0;             // flags already set by NEG
983         *pretregs &= ~mPSW;
984     }
985     else if ((e2 = e.EV.E2).Eoper == OPconst &&    // if rvalue is a const
986              el_signx32(e2) &&
987              // Don't evaluate e2 in register if we can use an INC or DEC
988              (((sz <= REGSIZE || tyfv(tyml)) &&
989                (op == OPaddass || op == OPminass) &&
990                (el_allbits(e2, 1) || el_allbits(e2, -1))
991               ) ||
992               (!evalinregister(e2)
993                && tyml != TYhptr
994               )
995              )
996             )
997     {
998         getlvalue(cdb,&cs,e1,0);
999         modEA(cdb,&cs);
1000         cs.IFL2 = FLconst;
1001         cs.IEV2.Vsize_t = e2.EV.Vint;
1002         if (sz <= REGSIZE || tyfv(tyml) || opsize)
1003         {
1004             targ_int i = cs.IEV2.Vint;
1005 
1006             // Handle shortcuts. Watch out for if result has
1007             // to be in flags.
1008 
1009             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,reg) && i != 1 && i != -1 &&
1010                 !opsize)
1011             {
1012                 cs.Iop = op1;
1013                 cs.Irm |= modregrm(0,reg & 7,0);
1014                 if (I64)
1015                 {   if (isbyte && reg >= 4)
1016                         cs.Irex |= REX;
1017                     if (reg & 8)
1018                         cs.Irex |= REX_R;
1019                 }
1020             }
1021             else
1022             {
1023                 cs.Iop = 0x81;
1024                 cs.Irm |= modregrm(0,mode,0);
1025                 switch (op)
1026                 {
1027                     case OPminass:      // convert to +=
1028                         cs.Irm ^= modregrm(0,5,0);
1029                         i = -i;
1030                         cs.IEV2.Vsize_t = i;
1031                         goto case OPaddass;
1032 
1033                     case OPaddass:
1034                         if (i == 1)             // INC EA
1035                                 goto L1;
1036                         else if (i == -1)       // DEC EA
1037                         {       cs.Irm |= modregrm(0,1,0);
1038                            L1:  cs.Iop = 0xFF;
1039                         }
1040                         break;
1041 
1042                     default:
1043                         break;
1044                 }
1045                 cs.Iop ^= isbyte;             // for byte operations
1046             }
1047             cs.Iflags |= opsize;
1048             if (forccs)
1049                 cs.Iflags |= CFpsw;
1050             else if (!I16 && cs.Iflags & CFopsize)
1051             {
1052                 switch (op)
1053                 {   case OPorass:
1054                     case OPxorass:
1055                         cs.IEV2.Vsize_t &= 0xFFFF;
1056                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1057                         break;
1058 
1059                     case OPandass:
1060                         cs.IEV2.Vsize_t |= ~0xFFFFL;
1061                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1062                         break;
1063 
1064                     case OPminass:
1065                     case OPaddass:
1066                         static if (1)
1067                         {
1068                             if ((cs.Irm & 0xC0) == 0xC0)    // EA is register
1069                                 cs.Iflags &= ~CFopsize;
1070                         }
1071                         else
1072                         {
1073                             if ((cs.Irm & 0xC0) == 0xC0 &&  // EA is register and
1074                                 e1.Eoper == OPind)          // not a register var
1075                                 cs.Iflags &= ~CFopsize;
1076                         }
1077                         break;
1078 
1079                     default:
1080                         assert(0);
1081                 }
1082             }
1083 
1084             // For scheduling purposes, we wish to replace:
1085             //    OP    EA
1086             // with:
1087             //    MOV   reg,EA
1088             //    OP    reg
1089             //    MOV   EA,reg
1090             if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
1091                 (config.target_cpu == TARGET_Pentium ||
1092                  config.target_cpu == TARGET_PentiumMMX) &&
1093                 config.flags4 & CFG4speed)
1094             {
1095                 regm_t sregm;
1096                 code cs2;
1097 
1098                 // Determine which registers to use
1099                 sregm = allregs & ~idxregm(&cs);
1100                 if (isbyte)
1101                     sregm &= BYTEREGS;
1102                 if (sregm & forregs)
1103                     sregm &= forregs;
1104 
1105                 allocreg(cdb,&sregm,&reg,tyml);      // allocate register
1106 
1107                 cs2 = cs;
1108                 cs2.Iflags &= ~CFpsw;
1109                 cs2.Iop = LOD ^ isbyte;
1110                 code_newreg(&cs2, reg);
1111                 cdb.gen(&cs2);                      // MOV reg,EA
1112 
1113                 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7);
1114                 if (reg & 8)
1115                     cs.Irex |= REX_B;
1116                 cdb.gen(&cs);                       // OP reg
1117 
1118                 cs2.Iop ^= 2;
1119                 cdb.gen(&cs2);                      // MOV EA,reg
1120 
1121                 retregs = sregm;
1122                 wantres = 0;
1123                 if (e1.Ecount)
1124                     cssave(e1,retregs,!OTleaf(e1.Eoper));
1125             }
1126             else
1127             {
1128                 cdb.gen(&cs);
1129                 cs.Iflags &= ~opsize;
1130                 cs.Iflags &= ~CFpsw;
1131                 if (I16 && opsize)                     // if DWORD operand
1132                     cs.IEV1.Voffset += 2; // compensate for wantres code
1133             }
1134         }
1135         else if (sz == 2 * REGSIZE)
1136         {
1137             targ_uns msw;
1138 
1139             cs.Iop = 0x81;
1140             cs.Irm |= modregrm(0,mode,0);
1141             cs.Iflags |= cflags;
1142             cdb.gen(&cs);
1143             cs.Iflags &= ~CFpsw;
1144 
1145             getlvalue_msw(&cs);             // point to msw
1146             msw = cast(uint)MSREG(e.EV.E2.EV.Vllong);
1147             cs.IEV2.Vuns = msw;             // msw of constant
1148             switch (op)
1149             {
1150                 case OPminass:
1151                     cs.Irm ^= modregrm(0,6,0);      // SUB => SBB
1152                     break;
1153 
1154                 case OPaddass:
1155                     cs.Irm |= modregrm(0,2,0);      // ADD => ADC
1156                     break;
1157 
1158                 default:
1159                     break;
1160             }
1161             cdb.gen(&cs);
1162         }
1163         else
1164             assert(0);
1165         freenode(e.EV.E2);        // don't need it anymore
1166     }
1167     else if (isregvar(e1,varregm,varreg) &&
1168              (e2.Eoper == OPvar || e2.Eoper == OPind) &&
1169             !evalinregister(e2) &&
1170              sz <= REGSIZE)               // deal with later
1171     {
1172         getlvalue(cdb,&cs,e2,0);
1173         freenode(e2);
1174         getregs(cdb,varregm);
1175         code_newreg(&cs, varreg);
1176         if (I64 && sz == 1 && varreg >= 4)
1177             cs.Irex |= REX;
1178         cs.Iop = op1 ^ 2;                       // toggle direction bit
1179         if (forccs)
1180             cs.Iflags |= CFpsw;
1181         reverse = 2;                            // remember we toggled it
1182         cdb.gen(&cs);
1183         retregs = 0;            // to trigger a bug if we attempt to use it
1184     }
1185     else if ((op == OPaddass || op == OPminass) &&
1186              sz <= REGSIZE &&
1187              !e2.Ecount &&
1188              ((jop = jmpopcode(e2)) == JC || jop == JNC ||
1189               (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC)))
1190             )
1191     {
1192         /* e1 += (x < y)    ADC EA,0
1193          * e1 -= (x < y)    SBB EA,0
1194          * e1 += (x >= y)   SBB EA,-1
1195          * e1 -= (x >= y)   ADC EA,-1
1196          */
1197         getlvalue(cdb,&cs,e1,0);             // get lvalue
1198         modEA(cdb,&cs);
1199         regm_t keepmsk = idxregm(&cs);
1200         retregs = mPSW;
1201         if (OTconv(e2.Eoper))
1202         {
1203             scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true);
1204             freenode(e2);
1205         }
1206         else
1207             scodelem(cdb,e2,&retregs,keepmsk,true);
1208         cs.Iop = 0x81 ^ isbyte;                   // ADC EA,imm16/32
1209         uint regop = 2;                     // ADC
1210         if ((op == OPaddass) ^ (jop == JC))
1211             regop = 3;                          // SBB
1212         code_newreg(&cs,regop);
1213         cs.Iflags |= opsize;
1214         if (forccs)
1215             cs.Iflags |= CFpsw;
1216         cs.IFL2 = FLconst;
1217         cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0;
1218         cdb.gen(&cs);
1219         retregs = 0;            // to trigger a bug if we attempt to use it
1220     }
1221     else // evaluate e2 into register
1222     {
1223         retregs = (isbyte) ? BYTEREGS : ALLREGS;  // pick working reg
1224         if (tyml == TYhptr)
1225             retregs &= ~mCX;                    // need CX for shift count
1226         scodelem(cdb,e.EV.E2,&retregs,0,true);   // get rvalue
1227         getlvalue(cdb,&cs,e1,retregs);         // get lvalue
1228         modEA(cdb,&cs);
1229         cs.Iop = op1;
1230         if (sz <= REGSIZE || tyfv(tyml))
1231         {
1232             reg = findreg(retregs);
1233             code_newreg(&cs, reg);              // OP1 EA,reg
1234             if (sz == 1 && reg >= 4 && I64)
1235                 cs.Irex |= REX;
1236             if (forccs)
1237                 cs.Iflags |= CFpsw;
1238         }
1239         else if (tyml == TYhptr)
1240         {
1241             uint mreg = findregmsw(retregs);
1242             uint lreg = findreglsw(retregs);
1243             getregs(cdb,retregs | mCX);
1244 
1245             // If h -= l, convert to h += -l
1246             if (e.Eoper == OPminass)
1247             {
1248                 cdb.gen2(0xF7,modregrm(3,3,mreg));      // NEG mreg
1249                 cdb.gen2(0xF7,modregrm(3,3,lreg));      // NEG lreg
1250                 code_orflag(cdb.last(),CFpsw);
1251                 cdb.genc2(0x81,modregrm(3,3,mreg),0);   // SBB mreg,0
1252             }
1253             cs.Iop = 0x01;
1254             cs.Irm |= modregrm(0,lreg,0);
1255             cdb.gen(&cs);                               // ADD EA,lreg
1256             code_orflag(cdb.last(),CFpsw);
1257             cdb.genc2(0x81,modregrm(3,2,mreg),0);       // ADC mreg,0
1258             genshift(cdb);                              // MOV CX,offset __AHSHIFT
1259             cdb.gen2(0xD3,modregrm(3,4,mreg));          // SHL mreg,CL
1260             NEWREG(cs.Irm,mreg);                        // ADD EA+2,mreg
1261             getlvalue_msw(&cs);
1262         }
1263         else if (sz == 2 * REGSIZE)
1264         {
1265             cs.Irm |= modregrm(0,findreglsw(retregs),0);
1266             cdb.gen(&cs);                               // OP1 EA,reg+1
1267             code_orflag(cdb.last(),cflags);
1268             cs.Iop = op2;
1269             NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg
1270             getlvalue_msw(&cs);
1271         }
1272         else
1273             assert(0);
1274         cdb.gen(&cs);
1275         retregs = 0;            // to trigger a bug if we attempt to use it
1276     }
1277 
1278     // See if we need to reload result into a register.
1279     // Need result in registers in case we have a 32 bit
1280     // result and we want the flags as a result.
1281     if (wantres || (sz > REGSIZE && forccs))
1282     {
1283         if (sz <= REGSIZE)
1284         {
1285             regm_t possregs;
1286 
1287             possregs = ALLREGS;
1288             if (isbyte)
1289                 possregs = BYTEREGS;
1290             retregs = forregs & possregs;
1291             if (!retregs)
1292                 retregs = possregs;
1293 
1294             // If reg field is destination
1295             if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5)
1296             {
1297                 reg = (cs.Irm >> 3) & 7;
1298                 if (cs.Irex & REX_R)
1299                     reg |= 8;
1300                 retregs = mask(reg);
1301                 allocreg(cdb,&retregs,&reg,tyml);
1302             }
1303             // If lvalue is a register, just use that register
1304             else if ((cs.Irm & 0xC0) == 0xC0)
1305             {
1306                 reg = cs.Irm & 7;
1307                 if (cs.Irex & REX_B)
1308                     reg |= 8;
1309                 retregs = mask(reg);
1310                 allocreg(cdb,&retregs,&reg,tyml);
1311             }
1312             else
1313             {
1314                 allocreg(cdb,&retregs,&reg,tyml);
1315                 cs.Iop = LOD ^ isbyte ^ reverse;
1316                 code_newreg(&cs, reg);
1317                 if (I64 && isbyte && reg >= 4)
1318                     cs.Irex |= REX_W;
1319                 cdb.gen(&cs);               // MOV reg,EA
1320             }
1321         }
1322         else if (tyfv(tyml) || tyml == TYhptr)
1323         {
1324             regm_t idxregs;
1325 
1326             if (tyml == TYhptr)
1327                 getlvalue_lsw(&cs);
1328             idxregs = idxregm(&cs);
1329             retregs = forregs & ~idxregs;
1330             if (!(retregs & IDXREGS))
1331                 retregs |= IDXREGS & ~idxregs;
1332             if (!(retregs & mMSW))
1333                 retregs |= mMSW & ALLREGS;
1334             allocreg(cdb,&retregs,&reg,tyml);
1335             NEWREG(cs.Irm,findreglsw(retregs));
1336             if (retregs & mES)              // if want ES loaded
1337             {
1338                 cs.Iop = 0xC4;
1339                 cdb.gen(&cs);               // LES lreg,EA
1340             }
1341             else
1342             {
1343                 cs.Iop = LOD;
1344                 cdb.gen(&cs);               // MOV lreg,EA
1345                 getlvalue_msw(&cs);
1346                 if (I32)
1347                     cs.Iflags |= CFopsize;
1348                 NEWREG(cs.Irm,reg);
1349                 cdb.gen(&cs);               // MOV mreg,EA+2
1350             }
1351         }
1352         else if (sz == 2 * REGSIZE)
1353         {
1354             regm_t idx = idxregm(&cs);
1355             retregs = forregs;
1356             if (!retregs)
1357                 retregs = ALLREGS;
1358             allocreg(cdb,&retregs,&reg,tyml);
1359             cs.Iop = LOD;
1360             NEWREG(cs.Irm,reg);
1361 
1362             code csl = cs;
1363             NEWREG(csl.Irm,findreglsw(retregs));
1364             getlvalue_lsw(&csl);
1365 
1366             if (mask(reg) & idx)
1367             {
1368                 cdb.gen(&csl);             // MOV reg+1,EA
1369                 cdb.gen(&cs);              // MOV reg,EA+2
1370             }
1371             else
1372             {
1373                 cdb.gen(&cs);              // MOV reg,EA+2
1374                 cdb.gen(&csl);             // MOV reg+1,EA
1375             }
1376         }
1377         else
1378             assert(0);
1379         if (e1.Ecount)                 // if we gen a CSE
1380             cssave(e1,retregs,!OTleaf(e1.Eoper));
1381     }
1382     freenode(e1);
1383     if (sz <= REGSIZE)
1384         *pretregs &= ~mPSW;            // flags are already set
1385     fixresult(cdb,e,retregs,pretregs);
1386 }
1387 
1388 /********************************
1389  * Generate code for *=
1390  */
1391 
1392 @trusted
1393 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1394 {
1395     code cs;
1396     regm_t retregs;
1397     reg_t resreg;
1398     uint opr,isbyte;
1399 
1400     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1401     elem *e1 = e.EV.E1;
1402     elem *e2 = e.EV.E2;
1403     OPER op = e.Eoper;                     // OPxxxx
1404 
1405     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1406     char uns = tyuns(tyml) || tyuns(e2.Ety);
1407     uint sz = _tysize[tyml];
1408 
1409     uint rex = (I64 && sz == 8) ? REX_W : 0;
1410     uint grex = rex << 16;          // 64 bit operands
1411 
1412     // See if evaluate in XMM registers
1413     if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0))
1414     {
1415         xmmopass(cdb,e,pretregs);
1416         return;
1417     }
1418 
1419     if (tyfloating(tyml))
1420     {
1421         if (config.exe & EX_posix)
1422         {
1423             opass87(cdb,e,pretregs);
1424         }
1425         else
1426         {
1427             opassdbl(cdb,e,pretregs,op);
1428         }
1429         return;
1430     }
1431 
1432     if (sz <= REGSIZE)                  // if word or byte
1433     {
1434         if (e2.Eoper == OPconst &&
1435             (I32 || I64) &&
1436             el_signx32(e2) &&
1437             sz >= 4)
1438         {
1439             // See if we can use an LEA instruction
1440 
1441             int ss;
1442             int ss2 = 0;
1443             int shift;
1444 
1445             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1446             switch (e2factor)
1447             {
1448                 case 12:    ss = 1; ss2 = 2; goto L4;
1449                 case 24:    ss = 1; ss2 = 3; goto L4;
1450 
1451                 case 6:
1452                 case 3:     ss = 1; goto L4;
1453 
1454                 case 20:    ss = 2; ss2 = 2; goto L4;
1455                 case 40:    ss = 2; ss2 = 3; goto L4;
1456 
1457                 case 10:
1458                 case 5:     ss = 2; goto L4;
1459 
1460                 case 36:    ss = 3; ss2 = 2; goto L4;
1461                 case 72:    ss = 3; ss2 = 3; goto L4;
1462 
1463                 case 18:
1464                 case 9:     ss = 3; goto L4;
1465                 L4:
1466                 {
1467                     getlvalue(cdb,&cs,e1,0);           // get EA
1468                     modEA(cdb,&cs);
1469                     freenode(e2);
1470                     regm_t idxregs = idxregm(&cs);
1471                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1472                     if (!regm)
1473                         regm = allregs & ~(idxregs | mBP | mR13);
1474                     reg_t reg;
1475                     allocreg(cdb,&regm,&reg,tyml);
1476                     cs.Iop = LOD;
1477                     code_newreg(&cs,reg);
1478                     cs.Irex |= rex;
1479                     cdb.gen(&cs);                       // MOV reg,EA
1480 
1481                     assert((reg & 7) != BP);
1482                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1483                                 modregxrmx(ss,reg,reg));  // LEA reg,[ss*reg][reg]
1484                     if (ss2)
1485                     {
1486                         cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1487                                        modregxrm(ss2,reg,5));
1488                         cdb.last().IFL1 = FLconst;
1489                         cdb.last().IEV1.Vint = 0;       // LEA reg,0[ss2*reg]
1490                     }
1491                     else if (!(e2factor & 1))    // if even factor
1492                     {
1493                         genregs(cdb,0x03,reg,reg); // ADD reg,reg
1494                         code_orrex(cdb.last(),rex);
1495                     }
1496                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1497                     return;
1498                 }
1499 
1500                 case 37:
1501                 case 74:    shift = 2;
1502                             goto L5;
1503                 case 13:
1504                 case 26:    shift = 0;
1505                             goto L5;
1506                 L5:
1507                 {
1508                     getlvalue(cdb,&cs,e1,0);           // get EA
1509                     modEA(cdb,&cs);
1510                     freenode(e2);
1511                     regm_t idxregs = idxregm(&cs);
1512                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1513                     if (!regm)
1514                         regm = allregs & ~(idxregs | mBP | mR13);
1515                     reg_t reg;                          // return register
1516                     allocreg(cdb,&regm,&reg,tyml);
1517 
1518                     reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13));
1519 
1520                     cs.Iop = LOD;
1521                     code_newreg(&cs,sreg);
1522                     cs.Irex |= rex;
1523                     cdb.gen(&cs);                                         // MOV sreg,EA
1524 
1525                     assert((sreg & 7) != BP);
1526                     assert((reg & 7) != BP);
1527                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1528                                           modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1529                     if (shift)
1530                         cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1531                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1532                                           modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1533                     if (!(e2factor & 1))                                  // if even factor
1534                     {
1535                         genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1536                         code_orrex(cdb.last(),rex);
1537                     }
1538                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1539                     return;
1540                 }
1541 
1542                 default:
1543                     break;
1544             }
1545         }
1546 
1547         isbyte = (sz == 1);             // 1 for byte operation
1548 
1549         if (config.target_cpu >= TARGET_80286 &&
1550             e2.Eoper == OPconst && !isbyte)
1551         {
1552             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1553             if (I64 && sz == 8 && e2factor != cast(int)e2factor)
1554                 goto L1;
1555             freenode(e2);
1556             getlvalue(cdb,&cs,e1,0);     // get EA
1557             regm_t idxregs = idxregm(&cs);
1558             retregs = *pretregs & (ALLREGS | mBP) & ~idxregs;
1559             if (!retregs)
1560                 retregs = ALLREGS & ~idxregs;
1561             allocreg(cdb,&retregs,&resreg,tyml);
1562             cs.Iop = 0x69;                  // IMUL reg,EA,e2value
1563             cs.IFL2 = FLconst;
1564             cs.IEV2.Vint = cast(int)e2factor;
1565             opr = resreg;
1566         }
1567         else if (!I16 && !isbyte)
1568         {
1569          L1:
1570             retregs = *pretregs & (ALLREGS | mBP);
1571             if (!retregs)
1572                 retregs = ALLREGS;
1573             codelem(cdb,e2,&retregs,false); // load rvalue in reg
1574             getlvalue(cdb,&cs,e1,retregs);  // get EA
1575             getregs(cdb,retregs);           // destroy these regs
1576             cs.Iop = 0x0FAF;                        // IMUL resreg,EA
1577             resreg = findreg(retregs);
1578             opr = resreg;
1579         }
1580         else
1581         {
1582             retregs = mAX;
1583             codelem(cdb,e2,&retregs,false);      // load rvalue in AX
1584             getlvalue(cdb,&cs,e1,mAX);           // get EA
1585             getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs
1586             cs.Iop = 0xF7 ^ isbyte;                        // [I]MUL EA
1587             opr = uns ? 4 : 5;              // MUL/IMUL
1588             resreg = AX;                    // result register for *
1589         }
1590         code_newreg(&cs,opr);
1591         cdb.gen(&cs);
1592 
1593         opAssStoreReg(cdb, cs, e, resreg, pretregs);
1594         return;
1595     }
1596     else if (sz == 2 * REGSIZE)
1597     {
1598         if (e2.Eoper == OPconst && I32)
1599         {
1600             /*  if (msw)
1601                   IMUL    EDX,EDX,lsw
1602                   IMUL    reg,EAX,msw
1603                   ADD     reg,EDX
1604                 else
1605                   IMUL    reg,EDX,lsw
1606                 MOV       EDX,lsw
1607                 MUL       EDX
1608                 ADD       EDX,reg
1609              */
1610             freenode(e2);
1611             retregs = mDX|mAX;
1612             reg_t rhi, rlo;
1613             opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
1614             const regm_t keepmsk = idxregm(&cs);
1615 
1616             reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
1617 
1618             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1619             const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1620             const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1621 
1622             if (msw)
1623             {
1624                 genmulimm(cdb,DX,DX,lsw);          // IMUL EDX,EDX,lsw
1625                 genmulimm(cdb,reg,AX,msw);         // IMUL reg,EAX,msw
1626                 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX
1627             }
1628             else
1629                 genmulimm(cdb,reg,DX,lsw);         // IMUL reg,EDX,lsw
1630 
1631             movregconst(cdb,DX,lsw,0);             // MOV EDX,lsw
1632             getregs(cdb,mDX);
1633             cdb.gen2(0xF7,modregrm(3,4,DX));       // MUL EDX
1634             cdb.gen2(0x03,modregrm(3,DX,reg));     // ADD EDX,reg
1635         }
1636         else
1637         {
1638             retregs = mDX | mAX;
1639             regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX;
1640             codelem(cdb,e2,&rretregs,false);
1641             getlvalue(cdb,&cs,e1,retregs | rretregs);
1642             getregs(cdb,retregs);
1643             cs.Iop = LOD;
1644             cdb.gen(&cs);                   // MOV AX,EA
1645             getlvalue_msw(&cs);
1646             cs.Irm |= modregrm(0,DX,0);
1647             cdb.gen(&cs);                   // MOV DX,EA+2
1648             getlvalue_lsw(&cs);
1649             if (config.target_cpu >= TARGET_PentiumPro)
1650             {
1651                 regm_t rlo = findreglsw(rretregs);
1652                 regm_t rhi = findregmsw(rretregs);
1653                 /*  IMUL    rhi,EAX
1654                     IMUL    EDX,rlo
1655                     ADD     rhi,EDX
1656                     MUL     rlo
1657                     ADD     EDX,Erhi
1658                  */
1659                  getregs(cdb,mAX|mDX|mask(rhi));
1660                  cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1661                  cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1662                  cdb.gen2(0x03,modregrm(3,rhi,DX));
1663                  cdb.gen2(0xF7,modregrm(3,4,rlo));
1664                  cdb.gen2(0x03,modregrm(3,DX,rhi));
1665             }
1666             else
1667             {
1668                 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs));
1669             }
1670         }
1671 
1672         opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
1673         return;
1674     }
1675     else
1676     {
1677         assert(0);
1678     }
1679 }
1680 
1681 
1682 /********************************
1683  * Generate code for /= %=
1684  */
1685 
1686 @trusted
1687 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1688 {
1689     elem *e1 = e.EV.E1;
1690     elem *e2 = e.EV.E2;
1691 
1692     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1693     OPER op = e.Eoper;                     // OPxxxx
1694 
1695     // See if evaluate in XMM registers
1696     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
1697     {
1698         xmmopass(cdb,e,pretregs);
1699         return;
1700     }
1701 
1702     if (tyfloating(tyml))
1703     {
1704         if (config.exe & EX_posix)
1705         {
1706             opass87(cdb,e,pretregs);
1707         }
1708         else
1709         {
1710             opassdbl(cdb,e,pretregs,op);
1711         }
1712         return;
1713     }
1714 
1715     code cs = void;
1716 
1717     //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1718     char uns = tyuns(tyml) || tyuns(e2.Ety);
1719     uint sz = _tysize[tyml];
1720 
1721     uint rex = (I64 && sz == 8) ? REX_W : 0;
1722     uint grex = rex << 16;          // 64 bit operands
1723 
1724     if (sz <= REGSIZE)                  // if word or byte
1725     {
1726         uint isbyte = (sz == 1);        // 1 for byte operation
1727         reg_t resreg;
1728         targ_size_t e2factor;
1729         targ_size_t d;
1730         bool neg;
1731         int pow2;
1732 
1733         assert(!isbyte);                      // should never happen
1734         assert(I16 || sz != SHORTSIZE);
1735 
1736         if (e2.Eoper == OPconst)
1737         {
1738             e2factor = cast(targ_size_t)el_tolong(e2);
1739             pow2 = ispow2(e2factor);
1740             d = e2factor;
1741             if (!uns && cast(targ_llong)e2factor < 0)
1742             {
1743                 neg = true;
1744                 d = -d;
1745             }
1746         }
1747 
1748         // Signed divide by a constant
1749         if (config.flags4 & CFG4speed &&
1750             e2.Eoper == OPconst &&
1751             !uns &&
1752             (d & (d - 1)) &&
1753             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1754         {
1755             /* R1 / 10
1756              *
1757              *  MOV     EAX,m
1758              *  IMUL    R1
1759              *  MOV     EAX,R1
1760              *  SAR     EAX,31
1761              *  SAR     EDX,shpost
1762              *  SUB     EDX,EAX
1763              *  IMUL    EAX,EDX,d
1764              *  SUB     R1,EAX
1765              *
1766              * EDX = quotient
1767              * R1 = remainder
1768              */
1769             assert(sz == 4 || sz == 8);
1770 
1771             ulong m;
1772             int shpost;
1773             const int N = sz * 8;
1774             const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1775 
1776             freenode(e2);
1777 
1778             getlvalue(cdb,&cs,e1,mAX | mDX);
1779             reg_t reg;
1780             opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs)));    // MOV reg,EA
1781             getregs(cdb, mAX|mDX);
1782 
1783             /* Algorithm 5.2
1784              * if m>=2**(N-1)
1785              *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1786              * else
1787              *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1788              * if (neg)
1789              *    q = -q
1790              */
1791             const bool mgt = mhighbit || m >= (1UL << (N - 1));
1792             movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1793             cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL reg
1794             if (mgt)
1795                 cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,reg
1796             getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1797             genmovreg(cdb, AX, reg);                                // MOV EAX,reg
1798             cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1799             if (shpost)
1800                 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1801             reg_t r3;
1802             if (neg && op == OPdivass)
1803             {
1804                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1805                 r3 = AX;
1806             }
1807             else
1808             {
1809                 cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1810                 r3 = DX;
1811             }
1812 
1813             // r3 is quotient
1814             reg_t resregx;
1815             switch (op)
1816             {   case OPdivass:
1817                     resregx = r3;
1818                     break;
1819 
1820                 case OPmodass:
1821                     assert(reg != AX && r3 == DX);
1822                     if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1823                     {
1824                         cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1825                     }
1826                     else
1827                     {
1828                         movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0);     // MOV EAX,d
1829                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1830                         getregsNoSave(mAX);                             // EAX no longer contains 'd'
1831                     }
1832                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1833                     resregx = reg;
1834                     break;
1835 
1836                 default:
1837                     assert(0);
1838             }
1839 
1840             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1841             return;
1842         }
1843 
1844         // Unsigned divide by a constant
1845         void unsignedDivideByConstant(ref CodeBuilder cdb)
1846         {
1847             assert(sz == 4 || sz == 8);
1848 
1849             reg_t r3;
1850             reg_t reg;
1851             ulong m;
1852             int shpre;
1853             int shpost;
1854             code cs = void;
1855 
1856             if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1857             {
1858                 /* t1 = MULUH(m, n)
1859                  * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1860                  *   MOV   EAX,reg
1861                  *   MOV   EDX,m
1862                  *   MUL   EDX
1863                  *   MOV   EAX,reg
1864                  *   SUB   EAX,EDX
1865                  *   SHR   EAX,1
1866                  *   LEA   R3,[EAX][EDX]
1867                  *   SHR   R3,shpost-1
1868                  */
1869                 assert(shpre == 0);
1870 
1871                 freenode(e2);
1872                 getlvalue(cdb,&cs,e1,mAX | mDX);
1873                 regm_t idxregs = idxregm(&cs);
1874                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1875                 getregs(cdb, mAX|mDX);
1876 
1877                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1878                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m
1879                 getregs(cdb,mask(reg) | mDX | mAX);
1880                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1881                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1882                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1883                 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1884                 regm_t regm3 = allregs & ~idxregs;
1885                 if (op == OPmodass)
1886                 {
1887                     regm3 &= ~mask(reg);
1888                     if (!el_signx32(e2))
1889                         regm3 &= ~mAX;
1890                 }
1891                 allocreg(cdb,&regm3,&r3,TYint);
1892                 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1893                 if (shpost != 1)
1894                     cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1895             }
1896             else
1897             {
1898                 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1899                  *   SHR   EAX,shpre
1900                  *   MOV   reg,m
1901                  *   MUL   reg
1902                  *   SHR   EDX,shpost
1903                  */
1904 
1905                 freenode(e2);
1906                 getlvalue(cdb,&cs,e1,mAX | mDX);
1907                 regm_t idxregs = idxregm(&cs);
1908                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1909                 getregs(cdb, mAX|mDX);
1910 
1911                 if (reg != AX)
1912                 {
1913                     getregs(cdb,mAX);
1914                     genmovreg(cdb,AX,reg);                              // MOV EAX,reg
1915                 }
1916                 if (shpre)
1917                 {
1918                     getregs(cdb,mAX);
1919                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1920                 }
1921                 getregs(cdb,mDX);
1922                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1923                 getregs(cdb,mDX | mAX);
1924                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1925                 if (shpost)
1926                     cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1927                 r3 = DX;
1928             }
1929 
1930             reg_t resregx;
1931             switch (op)
1932             {
1933                 case OPdivass:
1934                     // r3 = quotient
1935                     resregx = r3;
1936                     break;
1937 
1938                 case OPmodass:
1939                     /* reg = original value
1940                      * r3  = quotient
1941                      */
1942                     assert(reg != AX);
1943                     if (el_signx32(e2))
1944                     {
1945                         cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1946                     }
1947                     else
1948                     {
1949                         assert(!(mask(r3) & mAX));
1950                         movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1951                         getregs(cdb,mAX);
1952                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1953                     }
1954                     getregs(cdb,mask(reg));
1955                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1956                     resregx = reg;
1957                     break;
1958 
1959                 default:
1960                     assert(0);
1961             }
1962 
1963             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1964             return;
1965         }
1966 
1967         if (config.flags4 & CFG4speed &&
1968             e2.Eoper == OPconst &&
1969             uns &&
1970             e2factor > 2 && (e2factor & (e2factor - 1)) &&
1971             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1972         {
1973             unsignedDivideByConstant(cdb);
1974             return;
1975         }
1976 
1977         if (config.flags4 & CFG4speed &&
1978             e2.Eoper == OPconst && !uns &&
1979             (sz == REGSIZE || (I64 && sz == 4)) &&
1980             pow2 != -1 &&
1981             e2factor == cast(int)e2factor &&
1982             !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass)
1983            )
1984         {
1985             freenode(e2);
1986             if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386)
1987             {
1988                 /* This is better than the code further down because it is
1989                  * not constrained to using AX and DX.
1990                  */
1991                 getlvalue(cdb,&cs,e1,0);
1992                 regm_t idxregs = idxregm(&cs);
1993                 reg_t reg;
1994                 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA
1995 
1996                 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg)));
1997                 genmovreg(cdb,r,reg);                        // MOV r,reg
1998                 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1999                 cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
2000                 cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
2001 
2002                 opAssStoreReg(cdb, cs, e, reg, pretregs);
2003                 return;
2004             }
2005 
2006             // Signed divide or modulo by power of 2
2007             getlvalue(cdb,&cs,e1,mAX | mDX);
2008             reg_t reg;
2009             opAssLoadReg(cdb,cs,e,reg,mAX);
2010 
2011             getregs(cdb,mDX);                   // DX is scratch register
2012             cdb.gen1(0x99);                     // CWD
2013             code_orrex(cdb.last(), rex);
2014             if (pow2 == 1)
2015             {
2016                 if (op == OPdivass)
2017                 {
2018                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2019                     cdb.gen2(0xD1,grex | modregrm(3,7,AX));        // SAR AX,1
2020                     resreg = AX;
2021                 }
2022                 else // OPmod
2023                 {
2024                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2025                     cdb.genc2(0x81,grex | modregrm(3,4,AX),1);     // AND AX,1
2026                     cdb.gen2(0x03,grex | modregrm(3,DX,AX));       // ADD DX,AX
2027                     resreg = DX;
2028                 }
2029             }
2030             else
2031             {
2032                 assert(pow2 < 32);
2033                 targ_ulong m = (1 << pow2) - 1;
2034                 if (op == OPdivass)
2035                 {
2036                     cdb.genc2(0x81,grex | modregrm(3,4,DX),m);     // AND DX,m
2037                     cdb.gen2(0x03,grex | modregrm(3,AX,DX));       // ADD AX,DX
2038                     // Be careful not to generate this for 8088
2039                     assert(config.target_cpu >= TARGET_80286);
2040                     cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2);  // SAR AX,pow2
2041                     resreg = AX;
2042                 }
2043                 else // OPmodass
2044                 {
2045                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2046                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2047                     cdb.genc2(0x81,grex | modregrm(3,4,AX),m);     // AND AX,m
2048                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2049                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2050                     resreg = AX;
2051                 }
2052             }
2053         }
2054         else
2055         {
2056             regm_t retregs = ALLREGS & ~(mAX|mDX);     // DX gets sign extension
2057             codelem(cdb,e2,&retregs,false);            // load rvalue in retregs
2058             reg_t reg = findreg(retregs);
2059             getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA
2060             getregs(cdb,mAX | mDX);         // destroy these regs
2061             cs.Irm |= modregrm(0,AX,0);
2062             cs.Iop = LOD;
2063             cdb.gen(&cs);                   // MOV AX,EA
2064             if (uns)                        // if uint
2065                 movregconst(cdb,DX,0,0);    // CLR DX
2066             else                            // else signed
2067             {
2068                 cdb.gen1(0x99);             // CWD
2069                 code_orrex(cdb.last(),rex);
2070             }
2071             getregs(cdb,mDX | mAX); // DX and AX will be destroyed
2072             const uint opr = uns ? 6 : 7;     // DIV/IDIV
2073             genregs(cdb,0xF7,opr,reg);   // OPR reg
2074             code_orrex(cdb.last(),rex);
2075             resreg = (op == OPmodass) ? DX : AX;        // result register
2076         }
2077         opAssStoreReg(cdb, cs, e, resreg, pretregs);
2078         return;
2079     }
2080 
2081     assert(sz == 2 * REGSIZE);
2082 
2083     targ_size_t e2factor;
2084     int pow2;
2085     if (e2.Eoper == OPconst)
2086     {
2087         e2factor = cast(targ_size_t)el_tolong(e2);
2088         pow2 = ispow2(e2factor);
2089     }
2090 
2091     // Register pair signed divide by power of 2
2092     if (op == OPdivass &&
2093         !uns &&
2094         e.Eoper == OPconst &&
2095         pow2 != -1 &&
2096         I32 // not set up for I16 or I64 cent
2097        )
2098     {
2099         freenode(e2);
2100         regm_t retregs = mDX|mAX | mCX|mBX;     // LSW must be byte reg because of later SETZ
2101         reg_t rhi, rlo;
2102         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2103         const regm_t keepmsk = idxregm(&cs);
2104         retregs = mask(rhi) | mask(rlo);
2105 
2106         if (pow2 < 32)
2107         {
2108             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2109 
2110             genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
2111             if (pow2 == 1)
2112                 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
2113             else
2114             {
2115                 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
2116                 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
2117             }
2118             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
2119             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
2120             cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
2121             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
2122         }
2123         else if (pow2 == 32)
2124         {
2125             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2126 
2127             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2128             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2129             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
2130             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
2131             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2132             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2133         }
2134         else if (pow2 < 63)
2135         {
2136             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2137             reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1)));
2138 
2139             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2140             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2141             cdb.genmovreg(r2,r1);                                         // MOV r2,r1
2142 
2143             if (pow2 == 33)
2144             {
2145                 cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
2146                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
2147                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
2148             }
2149             else
2150             {
2151                 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
2152                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
2153                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
2154             }
2155 
2156             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2157             cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
2158             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2159         }
2160         else
2161         {
2162             // This may be better done by cgelem.d
2163             assert(pow2 == 63);
2164             assert(mask(rlo) & BYTEREGS);                          // for SETZ
2165             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
2166             cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
2167             cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
2168             cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
2169             movregconst(cdb,rhi,0,0);                              // MOV rhi,0
2170         }
2171 
2172         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2173         return;
2174     }
2175 
2176     // Register pair signed modulo by power of 2
2177     if (op == OPmodass &&
2178         !uns &&
2179         e.Eoper == OPconst &&
2180         pow2 != -1 &&
2181         I32 // not set up for I64 cent yet
2182        )
2183     {
2184         freenode(e2);
2185         regm_t retregs = mDX|mAX;
2186         reg_t rhi, rlo;
2187         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2188         const regm_t keepmsk = idxregm(&cs);
2189 
2190         regm_t scratchm = allregs & ~(retregs | keepmsk);
2191         if (pow2 == 63)
2192             scratchm &= BYTEREGS;               // because of SETZ
2193         reg_t r1 = allocScratchReg(cdb, scratchm);
2194 
2195         if (pow2 < 32)
2196         {
2197             cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
2198             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
2199             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2200             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2201             cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
2202             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2203             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2204             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
2205         }
2206         else if (pow2 == 32)
2207         {
2208             cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
2209             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
2210             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
2211             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
2212             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
2213         }
2214         else if (pow2 < 63)
2215         {
2216             scratchm = allregs & ~(retregs | scratchm);
2217             reg_t r2;
2218             allocreg(cdb,&scratchm,&r2,TYint);
2219 
2220             cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
2221             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
2222             cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
2223             cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
2224             cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
2225             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
2226             cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
2227             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
2228             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
2229             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
2230         }
2231         else
2232         {
2233             // This may be better done by cgelem.d
2234             assert(pow2 == 63);
2235 
2236             cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
2237             cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
2238             cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
2239             cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
2240             cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
2241         }
2242 
2243         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2244         return;
2245     }
2246 
2247     regm_t rretregs = mCX|mBX;
2248     codelem(cdb,e2,&rretregs,false);    // load e2 into CX|BX
2249 
2250     reg_t rlo;
2251     reg_t rhi;
2252     opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs);
2253 
2254     regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX;
2255     uint lib = uns ? CLIB.uldiv : CLIB.ldiv;
2256     if (op == OPmodass)
2257         ++lib;
2258     callclib(cdb,e,lib,&retregs,idxregm(&cs));
2259 
2260     opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
2261 }
2262 
2263 
2264 /********************************
2265  * Generate code for <<= and >>=
2266  */
2267 
2268 @trusted
2269 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2270 {
2271     code cs;
2272     uint op1,op2;
2273 
2274     elem *e1 = e.EV.E1;
2275     elem *e2 = e.EV.E2;
2276 
2277     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
2278     uint sz = _tysize[tyml];
2279     uint isbyte = tybyte(e.Ety) != 0;        // 1 for byte operations
2280     tym_t tym = tybasic(e.Ety);                // type of result
2281     OPER oper = e.Eoper;
2282     assert(tysize(e2.Ety) <= REGSIZE);
2283 
2284     uint rex = (I64 && sz == 8) ? REX_W : 0;
2285 
2286     // if our lvalue is a cse, make sure we evaluate for result in register
2287     regm_t retregs;
2288     reg_t reg;
2289     if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,retregs,reg))
2290         *pretregs |= ALLREGS;
2291 
2292     // Select opcodes. op2 is used for msw for long shifts.
2293 
2294     switch (oper)
2295     {
2296         case OPshlass:
2297             op1 = 4;                    // SHL
2298             op2 = 2;                    // RCL
2299             break;
2300 
2301         case OPshrass:
2302             op1 = 5;                    // SHR
2303             op2 = 3;                    // RCR
2304             break;
2305 
2306         case OPashrass:
2307             op1 = 7;                    // SAR
2308             op2 = 3;                    // RCR
2309             break;
2310 
2311         default:
2312             assert(0);
2313     }
2314 
2315 
2316     uint v = 0xD3;                  // for SHIFT xx,CL cases
2317     uint loopcnt = 1;
2318     uint conste2 = false;
2319     uint shiftcnt = 0;              // avoid "use before initialized" warnings
2320     if (e2.Eoper == OPconst)
2321     {
2322         conste2 = true;                 // e2 is a constant
2323         shiftcnt = e2.EV.Vint;         // byte ordering of host
2324         if (config.target_cpu >= TARGET_80286 &&
2325             sz <= REGSIZE &&
2326             shiftcnt != 1)
2327             v = 0xC1;                   // SHIFT xx,shiftcnt
2328         else if (shiftcnt <= 3)
2329         {
2330             loopcnt = shiftcnt;
2331             v = 0xD1;                   // SHIFT xx,1
2332         }
2333     }
2334 
2335     if (v == 0xD3)                        // if COUNT == CL
2336     {
2337         retregs = mCX;
2338         codelem(cdb,e2,&retregs,false);
2339     }
2340     else
2341         freenode(e2);
2342     getlvalue(cdb,&cs,e1,mCX);          // get lvalue, preserve CX
2343     modEA(cdb,&cs);             // check for modifying register
2344 
2345     if (*pretregs == 0 ||               // if don't return result
2346         (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) ||
2347         sz > REGSIZE
2348        )
2349     {
2350         retregs = 0;            // value not returned in a register
2351         cs.Iop = v ^ isbyte;
2352         while (loopcnt--)
2353         {
2354             NEWREG(cs.Irm,op1);           // make sure op1 is first
2355             if (sz <= REGSIZE)
2356             {
2357                 if (conste2)
2358                 {
2359                     cs.IFL2 = FLconst;
2360                     cs.IEV2.Vint = shiftcnt;
2361                 }
2362                 cdb.gen(&cs);             // SHIFT EA,[CL|1]
2363                 if (*pretregs & mPSW && !loopcnt && conste2)
2364                   code_orflag(cdb.last(),CFpsw);
2365             }
2366             else // TYlong
2367             {
2368                 cs.Iop = 0xD1;            // plain shift
2369                 code *ce = gennop(null);                  // ce: NOP
2370                 if (v == 0xD3)
2371                 {
2372                     getregs(cdb,mCX);
2373                     if (!conste2)
2374                     {
2375                         assert(loopcnt == 0);
2376                         genjmp(cdb,JCXZ,FLcode,cast(block *) ce);   // JCXZ ce
2377                     }
2378                 }
2379                 code *cg;
2380                 if (oper == OPshlass)
2381                 {
2382                     cdb.gen(&cs);               // cg: SHIFT EA
2383                     cg = cdb.last();
2384                     code_orflag(cg,CFpsw);
2385                     getlvalue_msw(&cs);
2386                     NEWREG(cs.Irm,op2);
2387                     cdb.gen(&cs);               // SHIFT EA
2388                     getlvalue_lsw(&cs);
2389                 }
2390                 else
2391                 {
2392                     getlvalue_msw(&cs);
2393                     cdb.gen(&cs);
2394                     cg = cdb.last();
2395                     code_orflag(cg,CFpsw);
2396                     NEWREG(cs.Irm,op2);
2397                     getlvalue_lsw(&cs);
2398                     cdb.gen(&cs);
2399                 }
2400                 if (v == 0xD3)                    // if building a loop
2401                 {
2402                     genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg
2403                     regimmed_set(CX,0);           // note that now CX == 0
2404                 }
2405                 cdb.append(ce);
2406             }
2407         }
2408 
2409         // If we want the result, we must load it from the EA
2410         // into a register.
2411 
2412         if (sz == 2 * REGSIZE && *pretregs)
2413         {
2414             retregs = *pretregs & (ALLREGS | mBP);
2415             if (retregs)
2416             {
2417                 retregs &= ~idxregm(&cs);
2418                 allocreg(cdb,&retregs,&reg,tym);
2419                 cs.Iop = LOD;
2420 
2421                 // be careful not to trash any index regs
2422                 // do MSW first (which can't be an index reg)
2423                 getlvalue_msw(&cs);
2424                 NEWREG(cs.Irm,reg);
2425                 cdb.gen(&cs);
2426                 getlvalue_lsw(&cs);
2427                 reg = findreglsw(retregs);
2428                 NEWREG(cs.Irm,reg);
2429                 cdb.gen(&cs);
2430                 if (*pretregs & mPSW)
2431                     tstresult(cdb,retregs,tyml,true);
2432             }
2433             else        // flags only
2434             {
2435                 retregs = ALLREGS & ~idxregm(&cs);
2436                 allocreg(cdb,&retregs,&reg,TYint);
2437                 cs.Iop = LOD;
2438                 NEWREG(cs.Irm,reg);
2439                 cdb.gen(&cs);           // MOV reg,EA
2440                 cs.Iop = 0x0B;          // OR reg,EA+2
2441                 cs.Iflags |= CFpsw;
2442                 getlvalue_msw(&cs);
2443                 cdb.gen(&cs);
2444             }
2445         }
2446         if (e1.Ecount && !(retregs & regcon.mvar))   // if lvalue is a CSE
2447             cssave(e1,retregs,!OTleaf(e1.Eoper));
2448         freenode(e1);
2449         *pretregs = retregs;
2450         return;
2451     }
2452     else                                // else must evaluate in register
2453     {
2454         if (sz <= REGSIZE)
2455         {
2456             regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs);
2457             if (isbyte)
2458                 possregs &= BYTEREGS;
2459             retregs = *pretregs & possregs;
2460             if (retregs == 0)
2461                 retregs = possregs;
2462             allocreg(cdb,&retregs,&reg,tym);
2463             cs.Iop = LOD ^ isbyte;
2464             code_newreg(&cs, reg);
2465             if (isbyte && I64 && (reg >= 4))
2466                 cs.Irex |= REX;
2467             cdb.gen(&cs);                     // MOV reg,EA
2468             if (!I16)
2469             {
2470                 assert(!isbyte || (mask(reg) & BYTEREGS));
2471                 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt);
2472                 if (isbyte && I64 && (reg >= 4))
2473                     cdb.last().Irex |= REX;
2474                 code_orrex(cdb.last(), rex);
2475                 // We can do a 32 bit shift on a 16 bit operand if
2476                 // it's a left shift and we're not concerned about
2477                 // the flags. Remember that flags are not set if
2478                 // a shift of 0 occurs.
2479                 if (_tysize[tym] == SHORTSIZE &&
2480                     (oper == OPshrass || oper == OPashrass ||
2481                      (*pretregs & mPSW && conste2)))
2482                      cdb.last().Iflags |= CFopsize;            // 16 bit operand
2483             }
2484             else
2485             {
2486                 while (loopcnt--)
2487                 {   // Generate shift instructions.
2488                     cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt);
2489                 }
2490             }
2491             if (*pretregs & mPSW && conste2)
2492             {
2493                 assert(shiftcnt);
2494                 *pretregs &= ~mPSW;     // result is already in flags
2495                 code_orflag(cdb.last(),CFpsw);
2496             }
2497 
2498             opAssStoreReg(cdb,cs,e,reg,pretregs);
2499             return;
2500         }
2501         assert(0);
2502     }
2503 }
2504 
2505 
2506 /**********************************
2507  * Generate code for compares.
2508  * Handles lt,gt,le,ge,eqeq,ne for all data types.
2509  */
2510 
2511 @trusted
2512 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2513 {
2514     regm_t retregs,rretregs;
2515     reg_t reg,rreg;
2516     int fl;
2517 
2518     //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs));
2519     // Collect extra parameter. This is pretty ugly...
2520     int flag = cdcmp_flag;
2521     cdcmp_flag = 0;
2522 
2523     elem *e1 = e.EV.E1;
2524     elem *e2 = e.EV.E2;
2525     if (*pretregs == 0)                 // if don't want result
2526     {
2527         codelem(cdb,e1,pretregs,false);
2528         *pretregs = 0;                  // in case e1 changed it
2529         codelem(cdb,e2,pretregs,false);
2530         return;
2531     }
2532 
2533     if (tyvector(tybasic(e1.Ety)))
2534         return orthxmm(cdb,e,pretregs);
2535 
2536     uint jop = jmpopcode(e);        // must be computed before
2537                                         // leaves are free'd
2538     uint reverse = 0;
2539 
2540     OPER op = e.Eoper;
2541     assert(OTrel(op));
2542     bool eqorne = (op == OPeqeq) || (op == OPne);
2543 
2544     tym_t tym = tybasic(e1.Ety);
2545     uint sz = _tysize[tym];
2546     uint isbyte = sz == 1;
2547 
2548     uint rex = (I64 && sz == 8) ? REX_W : 0;
2549     uint grex = rex << 16;          // 64 bit operands
2550 
2551     code cs;
2552     code *ce;
2553     if (tyfloating(tym))                  // if floating operation
2554     {
2555         if (config.fpxmmregs)
2556         {
2557             retregs = mPSW;
2558             if (tyxmmreg(tym))
2559                 orthxmm(cdb,e,&retregs);
2560             else
2561                 orth87(cdb,e,&retregs);
2562         }
2563         else if (config.inline8087)
2564         {   retregs = mPSW;
2565             orth87(cdb,e,&retregs);
2566         }
2567         else
2568         {
2569             if (config.exe & EX_windos)
2570             {
2571                 int clib;
2572 
2573                 retregs = 0;                /* skip result for now          */
2574                 if (iffalse(e2))            /* second operand is constant 0 */
2575                 {
2576                     assert(!eqorne);        /* should be OPbool or OPnot    */
2577                     if (tym == TYfloat)
2578                     {
2579                         retregs = FLOATREGS;
2580                         clib = CLIB.ftst0;
2581                     }
2582                     else
2583                     {
2584                         retregs = DOUBLEREGS;
2585                         clib = CLIB.dtst0;
2586                     }
2587                     if (rel_exception(op))
2588                         clib += CLIB.dtst0exc - CLIB.dtst0;
2589                     codelem(cdb,e1,&retregs,false);
2590                     retregs = 0;
2591                     callclib(cdb,e,clib,&retregs,0);
2592                     freenode(e2);
2593                 }
2594                 else
2595                 {
2596                     clib = CLIB.dcmp;
2597                     if (rel_exception(op))
2598                         clib += CLIB.dcmpexc - CLIB.dcmp;
2599                     opdouble(cdb,e,&retregs,clib);
2600                 }
2601             }
2602             else
2603             {
2604                 assert(0);
2605             }
2606         }
2607         goto L3;
2608     }
2609 
2610     /* If it's a signed comparison of longs, we have to call a library    */
2611     /* routine, because we don't know the target of the signed branch     */
2612     /* (have to set up flags so that jmpopcode() will do it right)        */
2613     if (!eqorne &&
2614         (I16 && tym == TYlong  && tybasic(e2.Ety) == TYlong ||
2615          I32 && tym == TYllong && tybasic(e2.Ety) == TYllong)
2616        )
2617     {
2618         assert(jop != JC && jop != JNC);
2619         retregs = mDX | mAX;
2620         codelem(cdb,e1,&retregs,false);
2621         retregs = mCX | mBX;
2622         scodelem(cdb,e2,&retregs,mDX | mAX,false);
2623 
2624         if (I16)
2625         {
2626             retregs = 0;
2627             callclib(cdb,e,CLIB.lcmp,&retregs,0);    // gross, but it works
2628         }
2629         else
2630         {
2631             /* Generate:
2632              *      CMP  EDX,ECX
2633              *      JNE  C1
2634              *      XOR  EDX,EDX
2635              *      CMP  EAX,EBX
2636              *      JZ   C1
2637              *      JA   C3
2638              *      DEC  EDX
2639              *      JMP  C1
2640              * C3:  INC  EDX
2641              * C1:
2642              */
2643              getregs(cdb,mDX);
2644              genregs(cdb,0x39,CX,DX);             // CMP EDX,ECX
2645              code *c1 = gennop(null);
2646              genjmp(cdb,JNE,FLcode,cast(block *)c1);  // JNE C1
2647              movregconst(cdb,DX,0,0);             // XOR EDX,EDX
2648              genregs(cdb,0x39,BX,AX);             // CMP EAX,EBX
2649              genjmp(cdb,JE,FLcode,cast(block *)c1);   // JZ C1
2650              code *c3 = gen1(null,0x40 + DX);                  // INC EDX
2651              genjmp(cdb,JA,FLcode,cast(block *)c3);   // JA C3
2652              cdb.gen1(0x48 + DX);                              // DEC EDX
2653              genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1
2654              cdb.append(c3);
2655              cdb.append(c1);
2656              getregs(cdb,mDX);
2657              retregs = mPSW;
2658         }
2659         goto L3;
2660     }
2661 
2662     /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC
2663      * (This is already reflected in the jop)
2664      */
2665     if ((jop == JC || jop == JNC) &&
2666         (op == OPgt || op == OPle) &&
2667         (tyuns(tym) || tyuns(e2.Ety))
2668        )
2669     {   // jmpopcode() sez comparison should be reversed
2670         assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst);
2671         reverse ^= 2;
2672     }
2673 
2674     /* See if we should swap operands     */
2675     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2676     {
2677         e1 = e.EV.E2;
2678         e2 = e.EV.E1;
2679         reverse ^= 2;
2680     }
2681 
2682     retregs = allregs;
2683     if (isbyte)
2684         retregs = BYTEREGS;
2685 
2686     ce = null;
2687     cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
2688     cs.Irex = cast(ubyte)rex;
2689     if (sz > REGSIZE)
2690         ce = gennop(ce);
2691 
2692     switch (e2.Eoper)
2693     {
2694         default:
2695         L2:
2696             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2697             rretregs = allregs & ~retregs;
2698             if (isbyte)
2699                 rretregs &= BYTEREGS;
2700             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2701             if (sz <= REGSIZE)                              // CMP reg,rreg
2702             {
2703                 reg = findreg(retregs);             // get reg that e1 is in
2704                 rreg = findreg(rretregs);
2705                 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg);
2706                 code_orrex(cdb.last(), rex);
2707                 if (!I16 && sz == SHORTSIZE)
2708                     cdb.last().Iflags |= CFopsize;          // compare only 16 bits
2709                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
2710                     cdb.last().Irex |= REX;                 // address byte registers
2711             }
2712             else
2713             {
2714                 assert(sz <= 2 * REGSIZE);
2715 
2716                 // Compare MSW, if they're equal then compare the LSW
2717                 reg = findregmsw(retregs);
2718                 rreg = findregmsw(rretregs);
2719                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2720                 if (I32 && sz == 6)
2721                     cdb.last().Iflags |= CFopsize;         // seg is only 16 bits
2722                 else if (I64)
2723                     code_orrex(cdb.last(), REX_W);
2724                 genjmp(cdb,JNE,FLcode,cast(block *) ce);   // JNE nop
2725 
2726                 reg = findreglsw(retregs);
2727                 rreg = findreglsw(rretregs);
2728                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2729                 if (I64)
2730                     code_orrex(cdb.last(), REX_W);
2731             }
2732             break;
2733 
2734         case OPrelconst:
2735             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
2736                 goto L2;
2737             fl = el_fl(e2);
2738             switch (fl)
2739             {
2740                 case FLfunc:
2741                     fl = FLextern;          // so it won't be self-relative
2742                     break;
2743 
2744                 case FLdata:
2745                 case FLudata:
2746                 case FLextern:
2747                     if (sz > REGSIZE)       // compare against DS, not DGROUP
2748                         goto L2;
2749                     break;
2750 
2751                 case FLfardata:
2752                     break;
2753 
2754                 default:
2755                     goto L2;
2756             }
2757             cs.IFL2 = cast(ubyte)fl;
2758             cs.IEV2.Vsym = e2.EV.Vsym;
2759             if (sz > REGSIZE)
2760             {
2761                 cs.Iflags |= CFseg;
2762                 cs.IEV2.Voffset = 0;
2763             }
2764             else
2765             {
2766                 cs.Iflags |= CFoff;
2767                 cs.IEV2.Voffset = e2.EV.Voffset;
2768             }
2769             goto L4;
2770 
2771         case OPconst:
2772             // If compare against 0
2773             {
2774                 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) &&
2775                     isregvar(e1,retregs,reg)
2776                    )
2777                 {   // Just do a TEST instruction
2778                     genregs(cdb,0x85 ^ isbyte,reg,reg);      // TEST reg2,reg2
2779                     cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw;
2780                     code_orrex(cdb.last(), rex);
2781                     if (I64 && isbyte && reg >= 4)
2782                         cdb.last().Irex |= REX;                 // address byte registers
2783                     retregs = mPSW;
2784                     break;
2785                 }
2786             }
2787 
2788             if (!tyuns(tym) && !tyuns(e2.Ety) &&
2789                 !boolres(e2) && !(*pretregs & mPSW) &&
2790                 (sz == REGSIZE || (I64 && sz == 4)) &&
2791                 (!I16 || op == OPlt || op == OPge))
2792             {
2793                 assert(*pretregs & (allregs));
2794                 codelem(cdb,e1,pretregs,false);
2795                 reg = findreg(*pretregs);
2796                 getregs(cdb,mask(reg));
2797                 switch (op)
2798                 {
2799                     case OPle:
2800                         cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1);   // ADD reg,-1
2801                         code_orflag(cdb.last(), CFpsw);
2802                         cdb.genc2(0x81,grex | modregrmx(3,2,reg),0);          // ADC reg,0
2803                         goto oplt;
2804 
2805                     case OPgt:
2806                         cdb.gen2(0xF7,grex | modregrmx(3,3,reg));         // NEG reg
2807                             /* Flips the sign bit unless the value is 0 or int.min.
2808                             Also sets the carry bit when the value is not 0. */
2809                         code_orflag(cdb.last(), CFpsw);
2810                         cdb.genc2(0x81,grex | modregrmx(3,3,reg),0);  // SBB reg,0
2811                             /* Subtracts the carry bit. This turns int.min into
2812                             int.max, flipping the sign bit.
2813                             For other negative and positive values, subtracting 1
2814                             doesn't affect the sign bit.
2815                             For 0, the carry bit is not set, so this does nothing
2816                             and the sign bit is not affected. */
2817                         goto oplt;
2818 
2819                     case OPlt:
2820                     oplt:
2821                         // Get the sign bit, i.e. 1 if the value is negative.
2822                         if (!I16)
2823                             cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31
2824                         else
2825                         {   /* 8088-286 do not have a barrel shifter, so use this
2826                                faster sequence
2827                              */
2828                             genregs(cdb,0xD1,0,reg);   // ROL reg,1
2829                             reg_t regi;
2830                             if (reghasvalue(allregs,1,regi))
2831                                 genregs(cdb,0x23,reg,regi);  // AND reg,regi
2832                             else
2833                                 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1
2834                         }
2835                         break;
2836 
2837                     case OPge:
2838                         genregs(cdb,0xD1,4,reg);        // SHL reg,1
2839                         code_orrex(cdb.last(),rex);
2840                         code_orflag(cdb.last(), CFpsw);
2841                         genregs(cdb,0x19,reg,reg);      // SBB reg,reg
2842                         code_orrex(cdb.last(),rex);
2843                         if (I64)
2844                         {
2845                             cdb.gen2(0xFF,modregrmx(3,0,reg));       // INC reg
2846                             code_orrex(cdb.last(), rex);
2847                         }
2848                         else
2849                             cdb.gen1(0x40 + reg);                    // INC reg
2850                         break;
2851 
2852                     default:
2853                         assert(0);
2854                 }
2855                 freenode(e2);
2856                 goto ret;
2857             }
2858 
2859             cs.IFL2 = FLconst;
2860             if (sz == 16)
2861                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.hi;
2862             else if (sz > REGSIZE)
2863                 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);
2864             else
2865                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong;
2866 
2867             // The cmp immediate relies on sign extension of the 32 bit immediate value
2868             if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint)
2869                 goto L2;
2870           L4:
2871             cs.Iop = 0x81 ^ isbyte;
2872 
2873             /* if ((e1 is data or a '*' reference) and it's not a
2874              * common subexpression
2875              */
2876 
2877             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2878                  e1.Eoper == OPind) &&
2879                 !evalinregister(e1))
2880             {
2881                 getlvalue(cdb,&cs,e1,RMload);
2882                 freenode(e1);
2883                 if (evalinregister(e2))
2884                 {
2885                     retregs = idxregm(&cs);
2886                     if ((cs.Iflags & CFSEG) == CFes)
2887                         retregs |= mES;             // take no chances
2888                     rretregs = allregs & ~retregs;
2889                     if (isbyte)
2890                         rretregs &= BYTEREGS;
2891                     scodelem(cdb,e2,&rretregs,retregs,true);
2892                     cs.Iop = 0x39 ^ isbyte ^ reverse;
2893                     if (sz > REGSIZE)
2894                     {
2895                         rreg = findregmsw(rretregs);
2896                         cs.Irm |= modregrm(0,rreg,0);
2897                         getlvalue_msw(&cs);
2898                         cdb.gen(&cs);              // CMP EA+2,rreg
2899                         if (I32 && sz == 6)
2900                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2901                         if (I64 && isbyte && rreg >= 4)
2902                             cdb.last().Irex |= REX;
2903                         genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop
2904                         rreg = findreglsw(rretregs);
2905                         NEWREG(cs.Irm,rreg);
2906                         getlvalue_lsw(&cs);
2907                     }
2908                     else
2909                     {
2910                         rreg = findreg(rretregs);
2911                         code_newreg(&cs, rreg);
2912                         if (I64 && isbyte && rreg >= 4)
2913                             cs.Irex |= REX;
2914                     }
2915                 }
2916                 else
2917                 {
2918                     cs.Irm |= modregrm(0,7,0);
2919                     if (sz > REGSIZE)
2920                     {
2921                         if (sz == 6)
2922                             assert(0);
2923                         if (e2.Eoper == OPrelconst)
2924                         {   cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg;
2925                             cs.IEV2.Voffset = 0;
2926                         }
2927                         getlvalue_msw(&cs);
2928                         cdb.gen(&cs);              // CMP EA+2,const
2929                         if (!I16 && sz == 6)
2930                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2931                         genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop
2932                         if (e2.Eoper == OPconst)
2933                             cs.IEV2.Vint = cast(int)e2.EV.Vllong;
2934                         else if (e2.Eoper == OPrelconst)
2935                         {   // Turn off CFseg, on CFoff
2936                             cs.Iflags ^= CFseg | CFoff;
2937                             cs.IEV2.Voffset = e2.EV.Voffset;
2938                         }
2939                         else
2940                             assert(0);
2941                         getlvalue_lsw(&cs);
2942                     }
2943                     freenode(e2);
2944                 }
2945                 cdb.gen(&cs);
2946                 break;
2947             }
2948 
2949             regm_t regmx;
2950             reg_t regx;
2951             if (evalinregister(e2) && !OTassign(e1.Eoper) &&
2952                 !isregvar(e1,regmx,regx))
2953             {
2954                 regm_t m;
2955 
2956                 m = allregs & ~regcon.mvar;
2957                 if (isbyte)
2958                     m &= BYTEREGS;
2959                 if (m & (m - 1))    // if more than one free register
2960                     goto L2;
2961             }
2962             if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) &&
2963                 !boolres(e2) && !evalinregister(e1))
2964             {
2965                 retregs = mPSW;
2966                 scodelem(cdb,e1,&retregs,0,false);
2967                 freenode(e2);
2968                 break;
2969             }
2970             if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW)
2971             {
2972                 retregs |= mPSW;
2973                 scodelem(cdb,e1,&retregs,0,false);
2974                 freenode(e2);
2975                 break;
2976             }
2977             scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2978             if (sz == 1)
2979             {
2980                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2981                 cs.Irm = modregrm(3,7,reg & 7);
2982                 if (reg & 8)
2983                     cs.Irex |= REX_B;
2984                 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg)
2985                 {   assert(reg < 4);
2986                     cs.Irm |= 4;                    // use upper register half
2987                 }
2988                 if (I64 && reg >= 4)
2989                     cs.Irex |= REX;                 // address byte registers
2990             }
2991             else if (sz <= REGSIZE)
2992             {   // CMP reg,const
2993                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2994                 rretregs = allregs & ~retregs;
2995                 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,rreg))
2996                 {
2997                     genregs(cdb,0x3B,reg,rreg);
2998                     code_orrex(cdb.last(), rex);
2999                     if (!I16)
3000                         cdb.last().Iflags |= cs.Iflags & CFopsize;
3001                     freenode(e2);
3002                     break;
3003                 }
3004                 cs.Irm = modregrm(3,7,reg & 7);
3005                 if (reg & 8)
3006                     cs.Irex |= REX_B;
3007             }
3008             else if (sz <= 2 * REGSIZE)
3009             {
3010                 reg = findregmsw(retregs);          // get reg that e1 is in
3011                 cs.Irm = modregrm(3,7,reg);
3012                 cdb.gen(&cs);                       // CMP reg,MSW
3013                 if (I32 && sz == 6)
3014                     cdb.last().Iflags |= CFopsize;  // seg is only 16 bits
3015                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3016 
3017                 reg = findreglsw(retregs);
3018                 cs.Irm = modregrm(3,7,reg);
3019                 if (e2.Eoper == OPconst)
3020                     cs.IEV2.Vint = e2.EV.Vlong;
3021                 else if (e2.Eoper == OPrelconst)
3022                 {   // Turn off CFseg, on CFoff
3023                     cs.Iflags ^= CFseg | CFoff;
3024                     cs.IEV2.Voffset = e2.EV.Voffset;
3025                 }
3026                 else
3027                     assert(0);
3028             }
3029             else
3030                 assert(0);
3031             cdb.gen(&cs);                         // CMP sucreg,LSW
3032             freenode(e2);
3033             break;
3034 
3035         case OPind:
3036             if (e2.Ecount)
3037                 goto L2;
3038             goto L5;
3039 
3040         case OPvar:
3041             if (config.exe & (EX_OSX | EX_OSX64))
3042             {
3043                 if (movOnly(e2))
3044                     goto L2;
3045             }
3046             if ((e1.Eoper == OPvar &&
3047                  isregvar(e2,rretregs,reg) &&
3048                  sz <= REGSIZE
3049                 ) ||
3050                 (e1.Eoper == OPind &&
3051                  isregvar(e2,rretregs,reg) &&
3052                  !evalinregister(e1) &&
3053                  sz <= REGSIZE
3054                 )
3055                )
3056             {
3057                 // CMP EA,e2
3058                 getlvalue(cdb,&cs,e1,RMload);
3059                 freenode(e1);
3060                 cs.Iop = 0x39 ^ isbyte ^ reverse;
3061                 code_newreg(&cs,reg);
3062                 if (I64 && isbyte && reg >= 4)
3063                     cs.Irex |= REX;                 // address byte registers
3064                 cdb.gen(&cs);
3065                 freenode(e2);
3066                 break;
3067             }
3068           L5:
3069             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3070             if (sz <= REGSIZE)                      // CMP reg,EA
3071             {
3072                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3073                 uint opsize = cs.Iflags & CFopsize;
3074                 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0);
3075                 code_orflag(cdb.last(),opsize);
3076             }
3077             else if (sz <= 2 * REGSIZE)
3078             {
3079                 reg = findregmsw(retregs);   // get reg that e1 is in
3080                 // CMP reg,EA
3081                 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0);
3082                 if (I32 && sz == 6)
3083                     cdb.last().Iflags |= CFopsize;        // seg is only 16 bits
3084                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3085                 reg = findreglsw(retregs);
3086                 if (e2.Eoper == OPind)
3087                 {
3088                     NEWREG(cs.Irm,reg);
3089                     getlvalue_lsw(&cs);
3090                     cdb.gen(&cs);
3091                 }
3092                 else
3093                     loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0);
3094             }
3095             else
3096                 assert(0);
3097             freenode(e2);
3098             break;
3099     }
3100     cdb.append(ce);
3101 
3102 L3:
3103     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3104     {
3105         if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00))
3106         {
3107             regm_t resregs = retregs;
3108             if (!I64)
3109             {
3110                 resregs &= BYTEREGS;
3111                 if (!resregs)
3112                     resregs = BYTEREGS;
3113             }
3114             allocreg(cdb,&resregs,&reg,TYint);
3115             cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg
3116             if (I64 && reg >= 4)
3117                 code_orrex(cdb.last(),REX);
3118             if (tysize(e.Ety) > 1)
3119             {
3120                 genregs(cdb,MOVZXb,reg,reg);       // MOVZX reg,reg
3121                 if (I64 && sz == 8)
3122                     code_orrex(cdb.last(),REX_W);
3123                 if (I64 && reg >= 4)
3124                     code_orrex(cdb.last(),REX);
3125             }
3126             *pretregs &= ~mPSW;
3127             fixresult(cdb,e,resregs,pretregs);
3128         }
3129         else
3130         {
3131             code *nop = null;
3132             regm_t save = regcon.immed.mval;
3133             allocreg(cdb,&retregs,&reg,TYint);
3134             regcon.immed.mval = save;
3135             if ((*pretregs & mPSW) == 0 &&
3136                 (jop == JC || jop == JNC))
3137             {
3138                 getregs(cdb,retregs);
3139                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
3140                 if (rex || flag & REX_W)
3141                     code_orrex(cdb.last(), REX_W);
3142                 if (flag)
3143                 { }                                         // cdcond() will handle it
3144                 else if (jop == JNC)
3145                 {
3146                     if (I64)
3147                     {
3148                         cdb.gen2(0xFF,modregrmx(3,0,reg));  // INC reg
3149                         code_orrex(cdb.last(), rex);
3150                     }
3151                     else
3152                         cdb.gen1(0x40 + reg);               // INC reg
3153                 }
3154                 else
3155                 {
3156                     cdb.gen2(0xF7,modregrmx(3,3,reg));      // NEG reg
3157                     code_orrex(cdb.last(), rex);
3158                 }
3159             }
3160             else if (I64 && sz == 8)
3161             {
3162                 assert(!flag);
3163                 movregconst(cdb,reg,1,64|8);   // MOV reg,1
3164                 nop = gennop(nop);
3165                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3166                                                             // MOV reg,0
3167                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64);
3168                 regcon.immed.mval &= ~mask(reg);
3169             }
3170             else
3171             {
3172                 assert(!flag);
3173                 movregconst(cdb,reg,1,8);      // MOV reg,1
3174                 nop = gennop(nop);
3175                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3176                                                             // MOV reg,0
3177                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0);
3178                 regcon.immed.mval &= ~mask(reg);
3179             }
3180             *pretregs = retregs;
3181             cdb.append(nop);
3182         }
3183     }
3184 ret:
3185     { }
3186 }
3187 
3188 
3189 /**********************************
3190  * Generate code for signed compare of longs.
3191  * Input:
3192  *      targ    block* or code*
3193  */
3194 
3195 @trusted
3196 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ)
3197 {
3198                                          // <=  >   <   >=
3199     static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
3200     static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];
3201 
3202     //printf("longcmp(e = %p)\n", e);
3203     elem *e1 = e.EV.E1;
3204     elem *e2 = e.EV.E2;
3205     OPER op = e.Eoper;
3206 
3207     // See if we should swap operands
3208     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
3209     {
3210         e1 = e.EV.E2;
3211         e2 = e.EV.E1;
3212         op = swaprel(op);
3213     }
3214 
3215     code cs;
3216     cs.Iflags = 0;
3217     cs.Irex = 0;
3218 
3219     code *ce = gennop(null);
3220     regm_t retregs = ALLREGS;
3221     regm_t rretregs;
3222     reg_t reg,rreg;
3223 
3224     uint jop = jopmsw[op - OPle];
3225     if (!(jcond & 1)) jop ^= (JL ^ JG);                   // toggle jump condition
3226     CodeBuilder cdbjmp;
3227     cdbjmp.ctor();
3228     genjmp(cdbjmp,jop,fltarg, cast(block *) targ);             // Jx targ
3229     genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce);   // Jy nop
3230 
3231     switch (e2.Eoper)
3232     {
3233         default:
3234         L2:
3235             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3236             rretregs = ALLREGS & ~retregs;
3237             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
3238             cse_flush(cdb,1);
3239             // Compare MSW, if they're equal then compare the LSW
3240             reg = findregmsw(retregs);
3241             rreg = findregmsw(rretregs);
3242             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3243             cdb.append(cdbjmp);
3244 
3245             reg = findreglsw(retregs);
3246             rreg = findreglsw(rretregs);
3247             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3248             break;
3249 
3250         case OPconst:
3251             cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);            // MSW first
3252             cs.IFL2 = FLconst;
3253             cs.Iop = 0x81;
3254 
3255             /* if ((e1 is data or a '*' reference) and it's not a
3256              * common subexpression
3257              */
3258 
3259             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
3260                  e1.Eoper == OPind) &&
3261                 !evalinregister(e1))
3262             {
3263                 getlvalue(cdb,&cs,e1,0);
3264                 freenode(e1);
3265                 if (evalinregister(e2))
3266                 {
3267                     retregs = idxregm(&cs);
3268                     if ((cs.Iflags & CFSEG) == CFes)
3269                             retregs |= mES;         // take no chances
3270                     rretregs = ALLREGS & ~retregs;
3271                     scodelem(cdb,e2,&rretregs,retregs,true);
3272                     cse_flush(cdb,1);
3273                     rreg = findregmsw(rretregs);
3274                     cs.Iop = 0x39;
3275                     cs.Irm |= modregrm(0,rreg,0);
3276                     getlvalue_msw(&cs);
3277                     cdb.gen(&cs);           // CMP EA+2,rreg
3278                     cdb.append(cdbjmp);
3279                     rreg = findreglsw(rretregs);
3280                     NEWREG(cs.Irm,rreg);
3281                 }
3282                 else
3283                 {
3284                     cse_flush(cdb,1);
3285                     cs.Irm |= modregrm(0,7,0);
3286                     getlvalue_msw(&cs);
3287                     cdb.gen(&cs);           // CMP EA+2,const
3288                     cdb.append(cdbjmp);
3289                     cs.IEV2.Vint = e2.EV.Vlong;
3290                     freenode(e2);
3291                 }
3292                 getlvalue_lsw(&cs);
3293                 cdb.gen(&cs);                   // CMP EA,rreg/const
3294                 break;
3295             }
3296             if (evalinregister(e2))
3297                 goto L2;
3298 
3299             scodelem(cdb,e1,&retregs,0,true);    // compute left leaf
3300             cse_flush(cdb,1);
3301             reg = findregmsw(retregs);              // get reg that e1 is in
3302             cs.Irm = modregrm(3,7,reg);
3303 
3304             cdb.gen(&cs);                           // CMP reg,MSW
3305             cdb.append(cdbjmp);
3306             reg = findreglsw(retregs);
3307             cs.Irm = modregrm(3,7,reg);
3308             cs.IEV2.Vint = e2.EV.Vlong;
3309             cdb.gen(&cs);                           // CMP sucreg,LSW
3310             freenode(e2);
3311             break;
3312 
3313         case OPvar:
3314             if (!e1.Ecount && e1.Eoper == OPs32_64)
3315             {
3316                 reg_t msreg;
3317 
3318                 retregs = allregs;
3319                 scodelem(cdb,e1.EV.E1,&retregs,0,true);
3320                 freenode(e1);
3321                 reg = findreg(retregs);
3322                 retregs = allregs & ~retregs;
3323                 allocreg(cdb,&retregs,&msreg,TYint);
3324                 genmovreg(cdb,msreg,reg);                  // MOV msreg,reg
3325                 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3326                 cse_flush(cdb,1);
3327                 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0);
3328                 cdb.append(cdbjmp);
3329                 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0);
3330                 freenode(e2);
3331             }
3332             else
3333             {
3334                 scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
3335                 cse_flush(cdb,1);
3336                 reg = findregmsw(retregs);   // get reg that e1 is in
3337                 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0);
3338                 cdb.append(cdbjmp);
3339                 reg = findreglsw(retregs);
3340                 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0);
3341                 freenode(e2);
3342             }
3343             break;
3344     }
3345 
3346     jop = joplsw[op - OPle];
3347     if (!(jcond & 1)) jop ^= 1;                           // toggle jump condition
3348     genjmp(cdb,jop,fltarg,cast(block *) targ);   // Jcond targ
3349 
3350     cdb.append(ce);
3351     freenode(e);
3352 }
3353 
3354 /*****************************
3355  * Do conversions.
3356  * Depends on OPd_s32 and CLIB.dbllng being in sequence.
3357  */
3358 
3359 @trusted
3360 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
3361 {
3362     //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs));
3363     //elem_print(e);
3364 
3365     static immutable ubyte[2][16] clib =
3366     [
3367         [ OPd_s32,        CLIB.dbllng   ],
3368         [ OPs32_d,        CLIB.lngdbl   ],
3369         [ OPd_s16,        CLIB.dblint   ],
3370         [ OPs16_d,        CLIB.intdbl   ],
3371         [ OPd_u16,        CLIB.dbluns   ],
3372         [ OPu16_d,        CLIB.unsdbl   ],
3373         [ OPd_u32,        CLIB.dblulng  ],
3374         [ OPu32_d,        CLIB.ulngdbl  ],
3375         [ OPd_s64,        CLIB.dblllng  ],
3376         [ OPs64_d,        CLIB.llngdbl  ],
3377         [ OPd_u64,        CLIB.dblullng ],
3378         [ OPu64_d,        CLIB.ullngdbl ],
3379         [ OPd_f,          CLIB.dblflt   ],
3380         [ OPf_d,          CLIB.fltdbl   ],
3381         [ OPvp_fp,        CLIB.vptrfptr ],
3382         [ OPcvp_fp,       CLIB.cvptrfptr]
3383     ];
3384 
3385     if (!*pretregs)
3386     {
3387         codelem(cdb,e.EV.E1,pretregs,false);
3388         return;
3389     }
3390 
3391     regm_t retregs;
3392     if (config.inline8087)
3393     {
3394         switch (e.Eoper)
3395         {
3396             case OPld_d:
3397             case OPd_ld:
3398             {
3399                 if (tycomplex(e.EV.E1.Ety))
3400                 {
3401             Lcomplex:
3402                     regm_t retregsx = mST01 | (*pretregs & mPSW);
3403                     codelem(cdb,e.EV.E1, &retregsx, false);
3404                     fixresult_complex87(cdb, e, retregsx, pretregs);
3405                     return;
3406                 }
3407                 regm_t retregsx = mST0 | (*pretregs & mPSW);
3408                 codelem(cdb,e.EV.E1, &retregsx, false);
3409                 fixresult87(cdb, e, retregsx, pretregs);
3410                 return;
3411             }
3412 
3413             case OPf_d:
3414             case OPd_f:
3415                 if (tycomplex(e.EV.E1.Ety))
3416                     goto Lcomplex;
3417                 if (config.fpxmmregs && *pretregs & XMMREGS)
3418                 {
3419                     xmmcnvt(cdb, e, pretregs);
3420                     return;
3421                 }
3422 
3423                 /* if won't do us much good to transfer back and        */
3424                 /* forth between 8088 registers and 8087 registers      */
3425                 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs))
3426                 {
3427                     retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety);
3428                     if (retregs & (mXMM1 | mXMM0 |mST01 | mST0))       // if return in ST0
3429                     {
3430                         codelem(cdb,e.EV.E1,pretregs,false);
3431                         if (*pretregs & mST0)
3432                             note87(e, 0, 0);
3433                         return;
3434                     }
3435                     else
3436                         break;
3437                 }
3438                 goto Lload87;
3439 
3440             case OPs64_d:
3441                 if (!I64)
3442                     goto Lload87;
3443                 goto case OPs32_d;
3444 
3445             case OPs32_d:
3446                 if (config.fpxmmregs && *pretregs & XMMREGS)
3447                 {
3448                     xmmcnvt(cdb, e, pretregs);
3449                     return;
3450                 }
3451                 goto Lload87;
3452 
3453             case OPs16_d:
3454             case OPu16_d:
3455             Lload87:
3456                 load87(cdb,e,0,pretregs,null,-1);
3457                 return;
3458 
3459             case OPu32_d:
3460                 if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
3461                 {
3462                     xmmcnvt(cdb,e,pretregs);
3463                     return;
3464                 }
3465                 else if (!I16)
3466                 {
3467                     regm_t retregsx = ALLREGS;
3468                     codelem(cdb,e.EV.E1, &retregsx, false);
3469                     reg_t reg = findreg(retregsx);
3470                     cdb.genfltreg(STO, reg, 0);
3471                     regwithvalue(cdb,ALLREGS,0,reg,0);
3472                     cdb.genfltreg(STO, reg, 4);
3473 
3474                     push87(cdb);
3475                     cdb.genfltreg(0xDF,5,0);     // FILD m64int
3476 
3477                     regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/;
3478                     fixresult87(cdb, e, retregsy, pretregs);
3479                     return;
3480                 }
3481                 break;
3482 
3483             case OPd_s64:
3484                 if (!I64)
3485                     goto Lcnvt87;
3486                 goto case OPd_s32;
3487 
3488             case OPd_s16:
3489             case OPd_s32:
3490                 if (config.fpxmmregs)
3491                 {
3492                     xmmcnvt(cdb,e,pretregs);
3493                     return;
3494                 }
3495                 goto Lcnvt87;
3496 
3497             case OPd_u16:
3498             Lcnvt87:
3499                 cnvt87(cdb,e,pretregs);
3500                 return;
3501 
3502             case OPd_u32:               // use subroutine, not 8087
3503                 if (I64 && config.fpxmmregs)
3504                 {
3505                     xmmcnvt(cdb,e,pretregs);
3506                     return;
3507                 }
3508                 if (I32 || I64)
3509                 {
3510                     cdd_u32(cdb,e,pretregs);
3511                     return;
3512                 }
3513                 if (config.exe & EX_posix)
3514                 {
3515                     retregs = mST0;
3516                 }
3517                 else
3518                 {
3519                     retregs = DOUBLEREGS;
3520                 }
3521                 goto L1;
3522 
3523             case OPd_u64:
3524                 if (I32 || I64)
3525                 {
3526                     cdd_u64(cdb,e,pretregs);
3527                     return;
3528                 }
3529                 retregs = DOUBLEREGS;
3530                 goto L1;
3531 
3532             case OPu64_d:
3533                 if (*pretregs & mST0)
3534                 {
3535                     regm_t retregsx = I64 ? mAX : mAX|mDX;
3536                     codelem(cdb,e.EV.E1,&retregsx,false);
3537                     callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
3538                     return;
3539                 }
3540                 break;
3541 
3542             case OPld_u64:
3543             {
3544                 if (I32 || I64)
3545                 {
3546                     cdd_u64(cdb,e,pretregs);
3547                     return;
3548                 }
3549                 regm_t retregsx = mST0;
3550                 codelem(cdb,e.EV.E1,&retregsx,false);
3551                 callclib(cdb,e,CLIB.ld_u64,pretregs,0);
3552                 return;
3553             }
3554 
3555             default:
3556                 break;
3557         }
3558     }
3559     retregs = regmask(e.EV.E1.Ety, TYnfunc);
3560 L1:
3561     codelem(cdb,e.EV.E1,&retregs,false);
3562     for (int i = 0; 1; i++)
3563     {
3564         assert(i < clib.length);
3565         if (clib[i][0] == e.Eoper)
3566         {
3567             callclib(cdb,e,clib[i][1],pretregs,0);
3568             break;
3569         }
3570     }
3571 }
3572 
3573 
3574 /***************************
3575  * Convert short to long.
3576  * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64,
3577  * OPu64_128, OPs64_128
3578  */
3579 
3580 @trusted
3581 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3582 {
3583     reg_t reg;
3584     regm_t retregs;
3585 
3586     //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3587     int e1comsub = e.EV.E1.Ecount;
3588     ubyte op = e.Eoper;
3589     if ((*pretregs & (ALLREGS | mBP)) == 0)    // if don't need result in regs
3590     {
3591         codelem(cdb,e.EV.E1,pretregs,false);     // then conversion isn't necessary
3592         return;
3593     }
3594     else if (
3595              op == OPnp_fp ||
3596              (I16 && op == OPu16_32) ||
3597              (I32 && op == OPu32_64) ||
3598              (I64 && op == OPu64_128)
3599             )
3600     {
3601         /* Result goes into a register pair.
3602          * Zero extend by putting a zero into most significant reg.
3603          */
3604 
3605         regm_t retregsx = *pretregs & mLSW;
3606         assert(retregsx);
3607         tym_t tym1 = tybasic(e.EV.E1.Ety);
3608         codelem(cdb,e.EV.E1,&retregsx,false);
3609 
3610         regm_t regm = *pretregs & (mMSW & ALLREGS);
3611         if (regm == 0)                  // *pretregs could be mES
3612             regm = mMSW & ALLREGS;
3613         allocreg(cdb,&regm,&reg,TYint);
3614         if (e1comsub)
3615             getregs(cdb,retregsx);
3616         if (op == OPnp_fp)
3617         {
3618             int segreg;
3619 
3620             // BUG: what about pointers to functions?
3621             switch (tym1)
3622             {
3623                 case TYimmutPtr:
3624                 case TYnptr:    segreg = SEG_DS;        break;
3625                 case TYcptr:    segreg = SEG_CS;        break;
3626                 case TYsptr:    segreg = SEG_SS;        break;
3627                 default:        assert(0);
3628             }
3629             cdb.gen2(0x8C,modregrm(3,segreg,reg));  // MOV reg,segreg
3630         }
3631         else
3632             movregconst(cdb,reg,0,0);  // 0 extend
3633 
3634         fixresult(cdb,e,retregsx | regm,pretregs);
3635         return;
3636     }
3637     else if (I64 && op == OPu32_64)
3638     {
3639         elem *e1 = e.EV.E1;
3640         retregs = *pretregs;
3641         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3642         {
3643             code cs;
3644 
3645             allocreg(cdb,&retregs,&reg,TYint);
3646             loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs);  //  MOV Ereg,EA
3647             freenode(e1);
3648         }
3649         else
3650         {
3651             *pretregs &= ~mPSW;                 // flags are set by eval of e1
3652             codelem(cdb,e1,&retregs,false);
3653             /* Determine if high 32 bits are already 0
3654              */
3655             if (e1.Eoper == OPu16_32 && !e1.Ecount)
3656             {
3657             }
3658             else
3659             {
3660                 // Zero high 32 bits
3661                 getregs(cdb,retregs);
3662                 reg = findreg(retregs);
3663                 // Don't use x89 because that will get optimized away
3664                 genregs(cdb,LOD,reg,reg);  // MOV Ereg,Ereg
3665             }
3666         }
3667         fixresult(cdb,e,retregs,pretregs);
3668         return;
3669     }
3670     else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount)
3671     {
3672         /* Due to how e1 is calculated, the high 32 bits of the register
3673          * are already 0.
3674          */
3675         retregs = *pretregs;
3676         codelem(cdb,e.EV.E1,&retregs,false);
3677         fixresult(cdb,e,retregs,pretregs);
3678         return;
3679     }
3680     else if (!I16 && (op == OPs16_32 || op == OPu16_32) ||
3681               I64 && op == OPs32_64)
3682     {
3683         elem *e11;
3684         elem *e1 = e.EV.E1;
3685 
3686         if (e1.Eoper == OPu8_16 && !e1.Ecount &&
3687             ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount))
3688            )
3689         {
3690             code cs;
3691 
3692             retregs = *pretregs & BYTEREGS;
3693             if (!retregs)
3694                 retregs = BYTEREGS;
3695             allocreg(cdb,&retregs,&reg,TYint);
3696             movregconst(cdb,reg,0,0);                   //  XOR reg,reg
3697             loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs);  //  MOV regL,EA
3698             freenode(e11);
3699             freenode(e1);
3700         }
3701         else if (e1.Eoper == OPvar ||
3702             (e1.Eoper == OPind && !e1.Ecount))
3703         {
3704             code cs = void;
3705 
3706             if (I32 && op == OPu16_32 && config.flags4 & CFG4speed)
3707                 goto L2;
3708             retregs = *pretregs;
3709             allocreg(cdb,&retregs,&reg,TYint);
3710             const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA
3711             if (op == OPs32_64)
3712             {
3713                 assert(I64);
3714                 // MOVSXD reg,e1
3715                 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs);
3716                 code_orrex(cdb.last(), REX_W);
3717             }
3718             else
3719                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs);
3720             freenode(e1);
3721         }
3722         else
3723         {
3724         L2:
3725             retregs = *pretregs;
3726             if (op == OPs32_64)
3727                 retregs = mAX | (*pretregs & mPSW);
3728             *pretregs &= ~mPSW;             // flags are already set
3729             CodeBuilder cdbx;
3730             cdbx.ctor();
3731             codelem(cdbx,e1,&retregs,false);
3732             code *cx = cdbx.finish();
3733             cdb.append(cdbx);
3734             getregs(cdb,retregs);
3735             if (op == OPu16_32 && cx)
3736             {
3737                 cx = code_last(cx);
3738                 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) &&
3739                     mask(cx.Irm & 7) == retregs)
3740                 {
3741                     // Convert AND of a word to AND of a dword, zeroing upper word
3742                     if (cx.Irex & REX_B)
3743                         retregs = mask(8 | (cx.Irm & 7));
3744                     cx.Iflags &= ~CFopsize;
3745                     cx.IEV2.Vint &= 0xFFFF;
3746                     goto L1;
3747                 }
3748             }
3749             if (op == OPs16_32 && retregs == mAX)
3750                 cdb.gen1(0x98);         // CWDE
3751             else if (op == OPs32_64 && retregs == mAX)
3752             {
3753                 cdb.gen1(0x98);         // CDQE
3754                 code_orrex(cdb.last(), REX_W);
3755             }
3756             else
3757             {
3758                 reg = findreg(retregs);
3759                 if (config.flags4 & CFG4speed && op == OPu16_32)
3760                 {   // AND reg,0xFFFF
3761                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu);
3762                 }
3763                 else
3764                 {
3765                     opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg
3766                     genregs(cdb,iop,reg,reg);
3767                 }
3768             }
3769          L1:
3770             if (e1comsub)
3771                 getregs(cdb,retregs);
3772         }
3773         fixresult(cdb,e,retregs,pretregs);
3774         return;
3775     }
3776     else if (*pretregs & mPSW || config.target_cpu < TARGET_80286)
3777     {
3778         // OPs16_32, OPs32_64
3779         // CWD doesn't affect flags, so we can depend on the integer
3780         // math to provide the flags.
3781         retregs = mAX | mPSW;               // want integer result in AX
3782         *pretregs &= ~mPSW;                 // flags are already set
3783         codelem(cdb,e.EV.E1,&retregs,false);
3784         getregs(cdb,mDX);           // sign extend into DX
3785         cdb.gen1(0x99);                     // CWD/CDQ
3786         if (e1comsub)
3787             getregs(cdb,retregs);
3788         fixresult(cdb,e,mDX | retregs,pretregs);
3789         return;
3790     }
3791     else
3792     {
3793         // OPs16_32, OPs32_64, OPs64_128
3794         uint msreg,lsreg;
3795 
3796         retregs = *pretregs & mLSW;
3797         assert(retregs);
3798         codelem(cdb,e.EV.E1,&retregs,false);
3799         retregs |= *pretregs & mMSW;
3800         allocreg(cdb,&retregs,&reg,e.Ety);
3801         msreg = findregmsw(retregs);
3802         lsreg = findreglsw(retregs);
3803         genmovreg(cdb,msreg,lsreg);                // MOV msreg,lsreg
3804         assert(config.target_cpu >= TARGET_80286);              // 8088 can't handle SAR reg,imm8
3805         cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3806         fixresult(cdb,e,retregs,pretregs);
3807         return;
3808     }
3809 }
3810 
3811 
3812 /***************************
3813  * Convert byte to int.
3814  * For OPu8_16 and OPs8_16.
3815  */
3816 
3817 @trusted
3818 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3819 {
3820     regm_t retregs;
3821     char size;
3822 
3823     if ((*pretregs & (ALLREGS | mBP | XMMREGS)) == 0) // if don't need result in regs
3824     {
3825         codelem(cdb,e.EV.E1,pretregs,false);      // then conversion isn't necessary
3826         return;
3827     }
3828 
3829     //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3830     char op = e.Eoper;
3831     elem *e1 = e.EV.E1;
3832     if (e1.Eoper == OPcomma)
3833         docommas(cdb,e1);
3834     if (!I16)
3835     {
3836         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3837         {
3838             code cs;
3839 
3840             regm_t retregsx = *pretregs;
3841             reg_t reg;
3842             allocreg(cdb,&retregsx,&reg,TYint);
3843             if (config.flags4 & CFG4speed &&
3844                 op == OPu8_16 && mask(reg) & BYTEREGS &&
3845                 config.target_cpu < TARGET_PentiumPro)
3846             {
3847                 movregconst(cdb,reg,0,0);                 //  XOR reg,reg
3848                 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); //  MOV regL,EA
3849             }
3850             else
3851             {
3852                 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA
3853                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx);
3854             }
3855             freenode(e1);
3856             fixresult(cdb,e,retregsx,pretregs);
3857             return;
3858         }
3859         size = tysize(e.Ety);
3860         retregs = *pretregs & BYTEREGS;
3861         if (retregs == 0)
3862             retregs = BYTEREGS;
3863         retregs |= *pretregs & mPSW;
3864         *pretregs &= ~mPSW;
3865     }
3866     else
3867     {
3868         if (op == OPu8_16)              // if uint conversion
3869         {
3870             retregs = *pretregs & BYTEREGS;
3871             if (retregs == 0)
3872                 retregs = BYTEREGS;
3873         }
3874         else
3875         {
3876             // CBW doesn't affect flags, so we can depend on the integer
3877             // math to provide the flags.
3878             retregs = mAX | (*pretregs & mPSW); // want integer result in AX
3879         }
3880     }
3881 
3882     CodeBuilder cdb1;
3883     cdb1.ctor();
3884     codelem(cdb1,e1,&retregs,false);
3885     code *c1 = cdb1.finish();
3886     cdb.append(cdb1);
3887     reg_t reg = findreg(retregs);
3888     code *c;
3889     if (!c1)
3890         goto L1;
3891 
3892     // If previous instruction is an AND bytereg,value
3893     c = cdb.last();
3894     if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) &&
3895         (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0))
3896     {
3897         if (*pretregs & mPSW)
3898             c.Iflags |= CFpsw;
3899         c.Iop |= 1;                    // convert to word operation
3900         c.IEV2.Vuns &= 0xFF;           // dump any high order bits
3901         *pretregs &= ~mPSW;             // flags already set
3902     }
3903     else
3904     {
3905      L1:
3906         if (!I16)
3907         {
3908             if (op == OPs8_16 && reg == AX && size == 2)
3909             {
3910                 cdb.gen1(0x98);                  // CBW
3911                 cdb.last().Iflags |= CFopsize;  // don't do a CWDE
3912             }
3913             else
3914             {
3915                 // We could do better by not forcing the src and dst
3916                 // registers to be the same.
3917 
3918                 if (config.flags4 & CFG4speed && op == OPu8_16)
3919                 {   // AND reg,0xFF
3920                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFF);
3921                 }
3922                 else
3923                 {
3924                     opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg
3925                     genregs(cdb,iop,reg,reg);
3926                     if (I64 && reg >= 4)
3927                         code_orrex(cdb.last(), REX);
3928                 }
3929             }
3930         }
3931         else
3932         {
3933             if (op == OPu8_16)
3934                 genregs(cdb,0x30,reg+4,reg+4);  // XOR regH,regH
3935             else
3936             {
3937                 cdb.gen1(0x98);                 // CBW
3938                 *pretregs &= ~mPSW;             // flags already set
3939             }
3940         }
3941     }
3942     getregs(cdb,retregs);
3943     fixresult(cdb,e,retregs,pretregs);
3944 }
3945 
3946 
3947 /***************************
3948  * Convert long to short (OP32_16).
3949  * Get offset of far pointer (OPoffset).
3950  * Convert int to byte (OP16_8).
3951  * Convert long long to long (OP64_32).
3952  * OP128_64
3953  */
3954 
3955 @trusted
3956 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3957 {
3958     debug
3959     {
3960         switch (e.Eoper)
3961         {
3962             case OP32_16:
3963             case OPoffset:
3964             case OP16_8:
3965             case OP64_32:
3966             case OP128_64:
3967                 break;
3968 
3969             default:
3970                 assert(0);
3971         }
3972     }
3973 
3974     regm_t retregs;
3975     if (e.Eoper == OP16_8)
3976     {
3977         retregs = *pretregs ? BYTEREGS : 0;
3978         codelem(cdb,e.EV.E1,&retregs,false);
3979     }
3980     else
3981     {
3982         if (e.EV.E1.Eoper == OPrelconst)
3983             offsetinreg(cdb,e.EV.E1,&retregs);
3984         else
3985         {
3986             retregs = *pretregs ? ALLREGS : 0;
3987             codelem(cdb,e.EV.E1,&retregs,false);
3988             bool isOff = e.Eoper == OPoffset;
3989             if (I16 ||
3990                 I32 && (isOff || e.Eoper == OP64_32) ||
3991                 I64 && (isOff || e.Eoper == OP128_64))
3992                 retregs &= mLSW;                // want LSW only
3993         }
3994     }
3995 
3996     /* We "destroy" a reg by assigning it the result of a new e, even
3997      * though the values are the same. Weakness of our CSE strategy that
3998      * a register can only hold the contents of one elem at a time.
3999      */
4000     if (e.Ecount)
4001         getregs(cdb,retregs);
4002     else
4003         useregs(retregs);
4004 
4005     debug
4006     if (!(!*pretregs || retregs))
4007     {
4008         printf("%s *pretregs = %s, retregs = %s, e = %p\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs),e);
4009     }
4010 
4011     assert(!*pretregs || retregs);
4012     fixresult(cdb,e,retregs,pretregs);  // lsw only
4013 }
4014 
4015 /**********************************************
4016  * Get top 32 bits of 64 bit value (I32)
4017  * or top 16 bits of 32 bit value (I16)
4018  * or top 64 bits of 128 bit value (I64).
4019  * OPmsw
4020  */
4021 
4022 @trusted
4023 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4024 {
4025     assert(e.Eoper == OPmsw);
4026 
4027     regm_t retregs = *pretregs ? ALLREGS : 0;
4028     codelem(cdb,e.EV.E1,&retregs,false);
4029     retregs &= mMSW;                    // want MSW only
4030 
4031     /* We "destroy" a reg by assigning it the result of a new e, even
4032      * though the values are the same. Weakness of our CSE strategy that
4033      * a register can only hold the contents of one elem at a time.
4034      */
4035     if (e.Ecount)
4036         getregs(cdb,retregs);
4037     else
4038         useregs(retregs);
4039 
4040     debug
4041     if (!(!*pretregs || retregs))
4042     {
4043         printf("%s *pretregs = %s, retregs = %s\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs));
4044         elem_print(e);
4045     }
4046 
4047     assert(!*pretregs || retregs);
4048     fixresult(cdb,e,retregs,pretregs);  // msw only
4049 }
4050 
4051 
4052 
4053 /******************************
4054  * Handle operators OPinp and OPoutp.
4055  */
4056 
4057 @trusted
4058 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4059 {
4060     //printf("cdport\n");
4061     ubyte op = 0xE4;            // root of all IN/OUT opcodes
4062     elem *e1 = e.EV.E1;
4063 
4064     // See if we can use immediate mode of IN/OUT opcodes
4065     ubyte port;
4066     if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 &&
4067         (!evalinregister(e1) || regcon.mvar & mDX))
4068     {
4069         port = cast(ubyte)e1.EV.Vuns;
4070         freenode(e1);
4071     }
4072     else
4073     {
4074         regm_t retregs = mDX;           // port number is always DX
4075         codelem(cdb,e1,&retregs,false);
4076         op |= 0x08;                     // DX version of opcode
4077         port = 0;                       // not logically needed, but
4078                                         // quiets "uninitialized var" complaints
4079     }
4080 
4081     uint sz;
4082     if (e.Eoper == OPoutp)
4083     {
4084         sz = tysize(e.EV.E2.Ety);
4085         regm_t retregs = mAX;           // byte/word to output is in AL/AX
4086         scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true);
4087         op |= 0x02;                     // OUT opcode
4088     }
4089     else // OPinp
4090     {
4091         getregs(cdb,mAX);
4092         sz = tysize(e.Ety);
4093     }
4094 
4095     if (sz != 1)
4096         op |= 1;                        // word operation
4097     cdb.genc2(op,0,port);               // IN/OUT AL/AX,DX/port
4098     if (op & 1 && sz != REGSIZE)        // if need size override
4099         cdb.last().Iflags |= CFopsize;
4100     regm_t retregs = mAX;
4101     fixresult(cdb,e,retregs,pretregs);
4102 }
4103 
4104 /************************
4105  * Generate code for an asm elem.
4106  */
4107 
4108 @trusted
4109 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4110 {
4111     // Assume only regs normally destroyed by a function are destroyed
4112     getregs(cdb,(ALLREGS | mES) & ~fregsaved);
4113     cdb.genasm(cast(ubyte[])e.EV.Vstring[0 .. e.EV.Vstrlen]);
4114     fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs);
4115 }
4116 
4117 /************************
4118  * Generate code for OPnp_f16p and OPf16p_np.
4119  */
4120 
4121 @trusted
4122 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4123 {
4124     code *cnop;
4125     code cs;
4126 
4127     assert(I32);
4128     codelem(cdb,e.EV.E1,pretregs,false);
4129     reg_t reg = findreg(*pretregs);
4130     getregs(cdb,*pretregs);      // we will destroy the regs
4131 
4132     cs.Iop = 0xC1;
4133     cs.Irm = modregrm(3,0,reg);
4134     cs.Iflags = 0;
4135     cs.Irex = 0;
4136     cs.IFL2 = FLconst;
4137     cs.IEV2.Vuns = 16;
4138 
4139     cdb.gen(&cs);                       // ROL ereg,16
4140     cs.Irm |= modregrm(0,1,0);
4141     cdb.gen(&cs);                       // ROR ereg,16
4142     cs.IEV2.Vuns = 3;
4143     cs.Iflags |= CFopsize;
4144 
4145     if (e.Eoper == OPnp_f16p)
4146     {
4147         /*      OR  ereg,ereg
4148                 JE  L1
4149                 ROR ereg,16
4150                 SHL reg,3
4151                 MOV rx,SS
4152                 AND rx,3                ;mask off CPL bits
4153                 OR  rl,4                ;run on LDT bit
4154                 OR  regl,rl
4155                 ROL ereg,16
4156             L1: NOP
4157          */
4158         reg_t rx;
4159 
4160         regm_t retregs = BYTEREGS & ~*pretregs;
4161         allocreg(cdb,&retregs,&rx,TYint);
4162         cnop = gennop(null);
4163         int jop = JCXZ;
4164         if (reg != CX)
4165         {
4166             gentstreg(cdb,reg);
4167             jop = JE;
4168         }
4169         genjmp(cdb,jop,FLcode, cast(block *)cnop);  // Jop L1
4170         NEWREG(cs.Irm,4);
4171         cdb.gen(&cs);                                   // SHL reg,3
4172         genregs(cdb,0x8C,2,rx);            // MOV rx,SS
4173         int isbyte = (mask(reg) & BYTEREGS) == 0;
4174         cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3);      // AND rl,3
4175         cdb.genc2(0x80,modregrm(3,1,rx),4);             // OR  rl,4
4176         genregs(cdb,0x0A | isbyte,reg,rx);   // OR  regl,rl
4177     }
4178     else // OPf16p_np
4179     {
4180         /*      ROR ereg,16
4181                 SHR reg,3
4182                 ROL ereg,16
4183          */
4184 
4185         cs.Irm |= modregrm(0,5,0);
4186         cdb.gen(&cs);                                   // SHR reg,3
4187         cnop = null;
4188     }
4189 }
4190 
4191 /*************************
4192  * Generate code for OPbtst
4193  */
4194 
4195 @trusted
4196 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4197 {
4198     regm_t retregs;
4199     reg_t reg;
4200 
4201     //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
4202 
4203     opcode_t op = 0xA3;                        // BT EA,value
4204     int mode = 4;
4205 
4206     elem *e1 = e.EV.E1;
4207     elem *e2 = e.EV.E2;
4208     code cs;
4209     cs.Iflags = 0;
4210 
4211     if (*pretregs == 0)                   // if don't want result
4212     {
4213         codelem(cdb,e1,pretregs,false);  // eval left leaf
4214         *pretregs = 0;                    // in case they got set
4215         codelem(cdb,e2,pretregs,false);
4216         return;
4217     }
4218 
4219     regm_t idxregs;
4220     if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar)
4221     {
4222         getlvalue(cdb, &cs, e1, RMload);    // get addressing mode
4223         idxregs = idxregm(&cs);             // mask if index regs used
4224     }
4225     else
4226     {
4227         retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs;
4228         codelem(cdb,e1, &retregs, false);
4229         reg = findreg(retregs);
4230         cs.Irm = modregrm(3,0,reg & 7);
4231         cs.Iflags = 0;
4232         cs.Irex = 0;
4233         if (reg & 8)
4234             cs.Irex |= REX_B;
4235         idxregs = retregs;
4236     }
4237 
4238     tym_t ty1 = tybasic(e1.Ety);
4239     const sz = tysize(e1.Ety);
4240     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4241 
4242 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4243     if (e2.Eoper == OPconst)
4244     {
4245         cs.Iop = 0x0FBA;                         // BT rm,imm8
4246         cs.Irm |= modregrm(0,mode,0);
4247         cs.Iflags |= CFpsw | word;
4248         cs.IFL2 = FLconst;
4249         if (sz <= SHORTSIZE)
4250         {
4251             cs.IEV2.Vint = e2.EV.Vint & 15;
4252         }
4253         else if (sz == 4)
4254         {
4255             cs.IEV2.Vint = e2.EV.Vint & 31;
4256         }
4257         else
4258         {
4259             cs.IEV2.Vint = e2.EV.Vint & 63;
4260             if (I64)
4261                 cs.Irex |= REX_W;
4262         }
4263         cdb.gen(&cs);
4264     }
4265     else
4266     {
4267         retregs = ALLREGS & ~idxregs;
4268 
4269         /* A register variable may not have its upper 32
4270          * bits 0, so pick a different register to force
4271          * a MOV which will clear it
4272          */
4273         if (I64 && sz == 8 && tysize(e2.Ety) == 4)
4274         {
4275             regm_t rregm;
4276             reg_t rreg;
4277             if (isregvar(e2, rregm, rreg))
4278                 retregs &= ~rregm;
4279         }
4280 
4281         scodelem(cdb,e2,&retregs,idxregs,true);
4282         reg = findreg(retregs);
4283 
4284         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4285         code_newreg(&cs,reg);
4286         cs.Iflags |= CFpsw | word;
4287         if (I64 && _tysize[ty1] == 8)
4288             cs.Irex |= REX_W;
4289         cdb.gen(&cs);
4290     }
4291 
4292     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4293     {
4294         if (tysize(e.Ety) == 1)
4295         {
4296             assert(I64 || retregs & BYTEREGS);
4297             allocreg(cdb,&retregs,&reg,TYint);
4298             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4299             if (I64 && reg >= 4)
4300                 code_orrex(cdb.last(), REX);
4301             *pretregs = retregs;
4302         }
4303         else
4304         {
4305             code *cnop = null;
4306             regm_t save = regcon.immed.mval;
4307             allocreg(cdb,&retregs,&reg,TYint);
4308             regcon.immed.mval = save;
4309             if ((*pretregs & mPSW) == 0)
4310             {
4311                 getregs(cdb,retregs);
4312                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
4313                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4314             }
4315             else
4316             {
4317                 movregconst(cdb,reg,1,8);      // MOV reg,1
4318                 cnop = gennop(null);
4319                 genjmp(cdb,JC,FLcode, cast(block *) cnop);  // Jtrue nop
4320                                                             // MOV reg,0
4321                 movregconst(cdb,reg,0,8);
4322                 regcon.immed.mval &= ~mask(reg);
4323             }
4324             *pretregs = retregs;
4325             cdb.append(cnop);
4326         }
4327     }
4328 }
4329 
4330 /*************************
4331  * Generate code for OPbt, OPbtc, OPbtr, OPbts
4332  */
4333 
4334 @trusted
4335 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4336 {
4337     //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs));
4338     regm_t retregs;
4339     reg_t reg;
4340     opcode_t op;
4341     int mode;
4342 
4343     switch (e.Eoper)
4344     {
4345         case OPbt:      op = 0xA3; mode = 4; break;
4346         case OPbtc:     op = 0xBB; mode = 7; break;
4347         case OPbtr:     op = 0xB3; mode = 6; break;
4348         case OPbts:     op = 0xAB; mode = 5; break;
4349 
4350         default:
4351             assert(0);
4352     }
4353 
4354     elem *e1 = e.EV.E1;
4355     elem *e2 = e.EV.E2;
4356     code cs;
4357     cs.Iflags = 0;
4358 
4359     getlvalue(cdb, &cs, e, RMload);      // get addressing mode
4360     if (e.Eoper == OPbt && *pretregs == 0)
4361     {
4362         codelem(cdb,e2,pretregs,false);
4363         return;
4364     }
4365 
4366     const ty1 = tybasic(e1.Ety);
4367     const ty2 = tybasic(e2.Ety);
4368     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4369     regm_t idxregs = idxregm(&cs);         // mask if index regs used
4370 
4371 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4372     if (e2.Eoper == OPconst)
4373     {
4374         cs.Iop = 0x0FBA;                         // BT rm,imm8
4375         cs.Irm |= modregrm(0,mode,0);
4376         cs.Iflags |= CFpsw | word;
4377         cs.IFL2 = FLconst;
4378         if (_tysize[ty1] == SHORTSIZE)
4379         {
4380             cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3;
4381             cs.IEV2.Vint = e2.EV.Vint & 15;
4382         }
4383         else if (_tysize[ty1] == 4)
4384         {
4385             cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3;
4386             cs.IEV2.Vint = e2.EV.Vint & 31;
4387         }
4388         else
4389         {
4390             cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3;
4391             cs.IEV2.Vint = e2.EV.Vint & 63;
4392             if (I64)
4393                 cs.Irex |= REX_W;
4394         }
4395         cdb.gen(&cs);
4396     }
4397     else
4398     {
4399         retregs = ALLREGS & ~idxregs;
4400         scodelem(cdb,e2,&retregs,idxregs,true);
4401         reg = findreg(retregs);
4402 
4403         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4404         code_newreg(&cs,reg);
4405         cs.Iflags |= CFpsw | word;
4406         if (_tysize[ty2] == 8 && I64)
4407             cs.Irex |= REX_W;
4408         cdb.gen(&cs);
4409     }
4410 
4411     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4412     {
4413         if (_tysize[e.Ety] == 1)
4414         {
4415             assert(I64 || retregs & BYTEREGS);
4416             allocreg(cdb,&retregs,&reg,TYint);
4417             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4418             if (I64 && reg >= 4)
4419                 code_orrex(cdb.last(), REX);
4420             *pretregs = retregs;
4421         }
4422         else
4423         {
4424             code *cnop = null;
4425             const save = regcon.immed.mval;
4426             allocreg(cdb,&retregs,&reg,TYint);
4427             regcon.immed.mval = save;
4428             if ((*pretregs & mPSW) == 0)
4429             {
4430                 getregs(cdb,retregs);
4431                 genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
4432                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4433             }
4434             else
4435             {
4436                 movregconst(cdb,reg,1,8);      // MOV reg,1
4437                 cnop = gennop(null);
4438                 genjmp(cdb,JC,FLcode, cast(block *) cnop);    // Jtrue nop
4439                                                             // MOV reg,0
4440                 movregconst(cdb,reg,0,8);
4441                 regcon.immed.mval &= ~mask(reg);
4442             }
4443             *pretregs = retregs;
4444             cdb.append(cnop);
4445         }
4446     }
4447 }
4448 
4449 /*************************************
4450  * Generate code for OPbsf and OPbsr.
4451  */
4452 
4453 @trusted
4454 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4455 {
4456     //printf("cdbscan()\n");
4457     //elem_print(e);
4458     if (!*pretregs)
4459     {
4460         codelem(cdb,e.EV.E1,pretregs,false);
4461         return;
4462     }
4463 
4464     const tyml = tybasic(e.EV.E1.Ety);
4465     const sz = _tysize[tyml];
4466     assert(sz == 2 || sz == 4 || sz == 8);
4467     code cs = void;
4468 
4469     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4470     {
4471         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4472     }
4473     else
4474     {
4475         regm_t retregs = allregs;
4476         codelem(cdb,e.EV.E1, &retregs, false);
4477         const reg = findreg(retregs);
4478         cs.Irm = modregrm(3,0,reg & 7);
4479         cs.Iflags = 0;
4480         cs.Irex = 0;
4481         if (reg & 8)
4482             cs.Irex |= REX_B;
4483     }
4484 
4485     regm_t retregs = *pretregs & allregs;
4486     if  (!retregs)
4487         retregs = allregs;
4488     reg_t reg;
4489     allocreg(cdb,&retregs, &reg, e.Ety);
4490 
4491     cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD;        // BSF/BSR reg,EA
4492     code_newreg(&cs, reg);
4493     if (!I16 && sz == SHORTSIZE)
4494         cs.Iflags |= CFopsize;
4495     cdb.gen(&cs);
4496     if (sz == 8)
4497         code_orrex(cdb.last(), REX_W);
4498 
4499     fixresult(cdb,e,retregs,pretregs);
4500 }
4501 
4502 /************************
4503  * OPpopcnt operator
4504  */
4505 
4506 @trusted
4507 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4508 {
4509     //printf("cdpopcnt()\n");
4510     //elem_print(e);
4511     assert(!I16);
4512     if (!*pretregs)
4513     {
4514         codelem(cdb,e.EV.E1,pretregs,false);
4515         return;
4516     }
4517 
4518     const tyml = tybasic(e.EV.E1.Ety);
4519 
4520     const sz = _tysize[tyml];
4521     assert(sz == 2 || sz == 4 || (sz == 8 && I64));     // no byte op
4522 
4523     code cs = void;
4524     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4525     {
4526         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4527     }
4528     else
4529     {
4530         regm_t retregs = allregs;
4531         codelem(cdb,e.EV.E1, &retregs, false);
4532         const reg = findreg(retregs);
4533         cs.Irm = modregrm(3,0,reg & 7);
4534         cs.Iflags = 0;
4535         cs.Irex = 0;
4536         if (reg & 8)
4537             cs.Irex |= REX_B;
4538     }
4539 
4540     regm_t retregs = *pretregs & allregs;
4541     if  (!retregs)
4542         retregs = allregs;
4543     reg_t reg;
4544     allocreg(cdb,&retregs, &reg, e.Ety);
4545 
4546     cs.Iop = POPCNT;            // POPCNT reg,EA
4547     code_newreg(&cs, reg);
4548     if (sz == SHORTSIZE)
4549         cs.Iflags |= CFopsize;
4550     if (*pretregs & mPSW)
4551         cs.Iflags |= CFpsw;
4552     cdb.gen(&cs);
4553     if (sz == 8)
4554         code_orrex(cdb.last(), REX_W);
4555     *pretregs &= mBP | ALLREGS;             // flags already set
4556 
4557     fixresult(cdb,e,retregs,pretregs);
4558 }
4559 
4560 
4561 /*******************************************
4562  * Generate code for OPpair, OPrpair.
4563  */
4564 
4565 @trusted
4566 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4567 {
4568     if (*pretregs == 0)                         // if don't want result
4569     {
4570         codelem(cdb,e.EV.E1,pretregs,false);     // eval left leaf
4571         *pretregs = 0;                          // in case they got set
4572         codelem(cdb,e.EV.E2,pretregs,false);
4573         return;
4574     }
4575 
4576     //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4577     //WRTYxx(e.Ety);printf("\n");
4578     //printf("Ecount = %d\n", e.Ecount);
4579 
4580     regm_t retregs = *pretregs;
4581     if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087)
4582     {
4583         if (config.fpxmmregs)
4584             retregs |= mXMM0 | mXMM1;
4585         else
4586             retregs |= mST01;
4587     }
4588 
4589     if (retregs & mST01)
4590     {
4591         loadPair87(cdb, e, pretregs);
4592         return;
4593     }
4594 
4595     regm_t regs1;
4596     regm_t regs2;
4597     if (retregs & XMMREGS)
4598     {
4599         retregs &= XMMREGS;
4600         const reg = findreg(retregs);
4601         regs1 = mask(reg);
4602         regs2 = mask(findreg(retregs & ~regs1));
4603     }
4604     else
4605     {
4606         retregs &= allregs;
4607         if  (!retregs)
4608             retregs = allregs;
4609         regs1 = retregs & mLSW;
4610         regs2 = retregs & mMSW;
4611     }
4612     if (e.Eoper == OPrpair)
4613     {
4614         // swap
4615         regs1 ^= regs2;
4616         regs2 ^= regs1;
4617         regs1 ^= regs2;
4618     }
4619     //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4620 
4621     codelem(cdb,e.EV.E1, &regs1, false);
4622     scodelem(cdb,e.EV.E2, &regs2, regs1, false);
4623 
4624     if (e.EV.E1.Ecount)
4625         getregs(cdb,regs1);
4626     if (e.EV.E2.Ecount)
4627         getregs(cdb,regs2);
4628 
4629     fixresult(cdb,e,regs1 | regs2,pretregs);
4630 }
4631 
4632 /*************************
4633  * Generate code for OPcmpxchg
4634  */
4635 
4636 @trusted
4637 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4638 {
4639     /* The form is:
4640      *     OPcmpxchg
4641      *    /     \
4642      * lvalue   OPparam
4643      *          /     \
4644      *        old     new
4645      */
4646 
4647     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
4648     elem *e1 = e.EV.E1;
4649     elem *e2 = e.EV.E2;
4650     assert(e2.Eoper == OPparam);
4651     assert(!e2.Ecount);
4652 
4653     const tyml = tybasic(e1.Ety);                   // type of lvalue
4654     const sz = _tysize[tyml];
4655 
4656     if (I32 && sz == 8)
4657     {
4658         regm_t retregsx = mDX|mAX;
4659         codelem(cdb,e2.EV.E1,&retregsx,false);          // [DX,AX] = e2.EV.E1
4660 
4661         regm_t retregs = mCX|mBX;
4662         scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false);  // [CX,BX] = e2.EV.E2
4663 
4664         code cs = void;
4665         getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX);        // get EA
4666 
4667         getregs(cdb,mDX|mAX);                 // CMPXCHG destroys these regs
4668 
4669         if (e1.Ety & mTYvolatile)
4670             cdb.gen1(LOCK);                           // LOCK prefix
4671         cs.Iop = 0x0FC7;                              // CMPXCHG8B EA
4672         cs.Iflags |= CFpsw;
4673         code_newreg(&cs,1);
4674         cdb.gen(&cs);
4675 
4676         assert(!e1.Ecount);
4677         freenode(e1);
4678     }
4679     else
4680     {
4681         const uint isbyte = (sz == 1);            // 1 for byte operation
4682         const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
4683         const uint rex = (I64 && sz == 8) ? REX_W : 0;
4684 
4685         regm_t retregsx = mAX;
4686         codelem(cdb,e2.EV.E1,&retregsx,false);       // AX = e2.EV.E1
4687 
4688         regm_t retregs = (ALLREGS | mBP) & ~mAX;
4689         scodelem(cdb,e2.EV.E2,&retregs,mAX,false);   // load rvalue in reg
4690 
4691         code cs = void;
4692         getlvalue(cdb,&cs,e1,mAX | retregs); // get EA
4693 
4694         getregs(cdb,mAX);                  // CMPXCHG destroys AX
4695 
4696         if (e1.Ety & mTYvolatile)
4697             cdb.gen1(LOCK);                        // LOCK prefix
4698         cs.Iop = 0x0FB1 ^ isbyte;                    // CMPXCHG EA,reg
4699         cs.Iflags |= CFpsw | word;
4700         cs.Irex |= rex;
4701         const reg = findreg(retregs);
4702         code_newreg(&cs,reg);
4703         cdb.gen(&cs);
4704 
4705         assert(!e1.Ecount);
4706         freenode(e1);
4707     }
4708 
4709     if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register
4710     {
4711         assert(tysize(e.Ety) == 1);
4712         assert(I64 || retregs & BYTEREGS);
4713         reg_t reg;
4714         allocreg(cdb,&retregs,&reg,TYint);
4715         uint ea = modregrmx(3,0,reg);
4716         if (I64 && reg >= 4)
4717             ea |= REX << 16;
4718         cdb.gen2(0x0F94,ea);        // SETZ reg
4719         *pretregs = retregs;
4720     }
4721 }
4722 
4723 /*************************
4724  * Generate code for OPprefetch
4725  */
4726 
4727 @trusted
4728 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4729 {
4730     /* Generate the following based on e2:
4731      *    0: prefetch0
4732      *    1: prefetch1
4733      *    2: prefetch2
4734      *    3: prefetchnta
4735      *    4: prefetchw
4736      *    5: prefetchwt1
4737      */
4738     //printf("cdprefetch\n");
4739     elem *e1 = e.EV.E1;
4740 
4741     assert(*pretregs == 0);
4742     assert(e.EV.E2.Eoper == OPconst);
4743     opcode_t op;
4744     reg_t reg;
4745     switch (e.EV.E2.EV.Vuns)
4746     {
4747         case 0: op = PREFETCH; reg = 1; break;  // PREFETCH0
4748         case 1: op = PREFETCH; reg = 2; break;  // PREFETCH1
4749         case 2: op = PREFETCH; reg = 3; break;  // PREFETCH2
4750         case 3: op = PREFETCH; reg = 0; break;  // PREFETCHNTA
4751         case 4: op = 0x0F0D;   reg = 1; break;  // PREFETCHW
4752         case 5: op = 0x0F0D;   reg = 2; break;  // PREFETCHWT1
4753         default: assert(0);
4754     }
4755 
4756     freenode(e.EV.E2);
4757 
4758     code cs = void;
4759     getlvalue(cdb,&cs,e1,0);
4760     cs.Iop = op;
4761     cs.Irm |= modregrm(0,reg,0);
4762     cs.Iflags |= CFvolatile;            // do not schedule
4763     cdb.gen(&cs);
4764 }
4765 
4766 
4767 /*********************
4768  * Load register from EA of assignment operation.
4769  * Params:
4770  *      cdb = store generated code here
4771  *      cs = instruction with EA already set in it
4772  *      e = assignment expression that will be evaluated
4773  *      reg = set to register loaded from EA
4774  *      retregs = register candidates for reg
4775  */
4776 @trusted
4777 private
4778 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs)
4779 {
4780     modEA(cdb, &cs);
4781     allocreg(cdb,&retregs,&reg,TYoffset);
4782 
4783     cs.Iop = LOD;
4784     code_newreg(&cs,reg);
4785     cdb.gen(&cs);                   // MOV reg,EA
4786 }
4787 
4788 /*********************
4789  * Load register pair from EA of assignment operation.
4790  * Params:
4791  *      cdb = store generated code here
4792  *      cs = instruction with EA already set in it
4793  *      e = assignment expression that will be evaluated
4794  *      rhi = set to most significant register of the pair
4795  *      rlo = set toleast significant register of the pair
4796  *      retregs = register candidates for rhi, rlo
4797  *      keepmsk = registers to not modify
4798  */
4799 @trusted
4800 private
4801 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk)
4802 {
4803     getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk);
4804     const tym_t tyml = tybasic(e.EV.E1.Ety);              // type of lvalue
4805     reg_t reg;
4806     allocreg(cdb,&retregs,&reg,tyml);
4807 
4808     rhi = findregmsw(retregs);
4809     rlo = findreglsw(retregs);
4810 
4811     cs.Iop = LOD;
4812     code_newreg(&cs,rlo);
4813     cdb.gen(&cs);                   // MOV rlo,EA
4814     getlvalue_msw(&cs);
4815     code_newreg(&cs,rhi);
4816     cdb.gen(&cs);                   // MOV rhi,EA+2
4817     getlvalue_lsw(&cs);
4818 }
4819 
4820 
4821 /*********************************************************
4822  * Store register result of assignment operation EA.
4823  * Params:
4824  *      cdb = store generated code here
4825  *      cs = instruction with EA already set in it
4826  *      e = assignment expression that was evaluated
4827  *      reg = register of result
4828  *      pretregs = registers to store result in
4829  */
4830 @trusted
4831 private
4832 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs)
4833 {
4834     elem* e1 = e.EV.E1;
4835     const tym_t tyml = tybasic(e1.Ety);     // type of lvalue
4836     const uint sz = _tysize[tyml];
4837     const ubyte isbyte = (sz == 1);         // 1 for byte operation
4838     cs.Iop = STO ^ isbyte;
4839     code_newreg(&cs,reg);
4840     cdb.gen(&cs);                           // MOV EA,resreg
4841     if (e1.Ecount)                          // if we gen a CSE
4842         cssave(e1,mask(reg),!OTleaf(e1.Eoper));
4843     freenode(e1);
4844     fixresult(cdb,e,mask(reg),pretregs);
4845 }
4846 
4847 /*********************************************************
4848  * Store register pair result of assignment operation EA.
4849  * Params:
4850  *      cdb = store generated code here
4851  *      cs = instruction with EA already set in it
4852  *      e = assignment expression that was evaluated
4853  *      rhi = most significant register of the pair
4854  *      rlo = least significant register of the pair
4855  *      pretregs = registers to store result in
4856  */
4857 @trusted
4858 private
4859 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs)
4860 {
4861     cs.Iop = STO;
4862     code_newreg(&cs,rlo);
4863     cdb.gen(&cs);                   // MOV EA,lsreg
4864     code_newreg(&cs,rhi);
4865     getlvalue_msw(&cs);
4866     cdb.gen(&cs);                   // MOV EA+REGSIZE,msreg
4867     const regm_t retregs = mask(rhi) | mask(rlo);
4868     elem* e1 = e.EV.E1;
4869     if (e1.Ecount)                 // if we gen a CSE
4870         cssave(e1,retregs,!OTleaf(e1.Eoper));
4871     freenode(e1);
4872     fixresult(cdb,e,retregs,pretregs);
4873 }